示例#1
0
    def test_pipeline_savefig_poof(self):
        """
        Test the poof call with an outdir to save all the figures
        """
        pipeline = VisualPipeline(
            [
                ("a", mock.MagicMock(MockTransformer())),
                ("b", mock.MagicMock(MockVisualTransformer())),
                ("c", mock.MagicMock(MockTransformer())),
                ("d", mock.MagicMock(MockVisualTransformer())),
                ("e", mock.MagicMock(MockVisualEstimator())),
            ]
        )

        # Must use path joining for Windows compatibility
        tmpdir = os.path.join("tmp", "figures")

        pipeline.poof(outdir=tmpdir)
        pipeline.steps[1][1].poof.assert_called_once_with(
            outpath=os.path.join(tmpdir, "b.pdf")
        )
        pipeline.steps[3][1].poof.assert_called_once_with(
            outpath=os.path.join(tmpdir, "d.pdf")
        )
        pipeline.steps[4][1].poof.assert_called_once_with(
            outpath=os.path.join(tmpdir, "e.pdf")
        )
示例#2
0
    def test_fit_transform_poof_and_draw_calls(self):
        """
        Test calling fit, transform, draw and poof on the pipeline
        """

        pipeline = VisualPipeline([
            ('a', MockTransformer()),
            ('b', MockVisualTransformer()),
            ('c', MockTransformer()),
            ('d', MockVisualTransformer()),
            ('e', MockEstimator()),
        ])

        X = [[1, 1, 1, 1, 1], [2, 2, 2, 2, 2], [3, 3, 3, 3, 3]]

        y = [1, 2, 3, 4, 5]

        pipeline.fit(X, y)
        for name, step in pipeline.named_steps.items():
            step.fit.assert_called_once_with(X, y)

        pipeline.transform(X)
        for name, step in pipeline.named_steps.items():
            step.transform.assert_called_once_with(X)

        pipeline.draw()
        for name, step in pipeline.named_steps.items():
            if name in {'a', 'c', 'e'}: continue
            step.draw.assert_called_once_with()

        pipeline.poof()
        for name, step in pipeline.named_steps.items():
            if name in {'a', 'c', 'e'}: continue
            step.poof.assert_called_once_with()
示例#3
0
    def test_validate_steps(self):
        """
        Assert that visual transformers can be added to pipelines
        """

        # Pipeline objects have a _validate_steps method that raises an
        # TypeError if the steps don't match transforms --> estimator.

        # validate a bad intermediate transformer on the Pipeline
        with pytest.raises(TypeError):
            Pipeline(
                [
                    ("real", MockTransformer()),
                    ("bad", Thing()),
                    ("model", MockEstimator()),
                ]
            )

        # validate a bad intermediate transformer on the VisualPipeline
        with pytest.raises(TypeError):
            VisualPipeline(
                [
                    ("real", MockTransformer()),
                    ("bad", Thing()),
                    ("model", MockEstimator()),
                ]
            )

        # validate a bad final estimator on the Pipeline
        with pytest.raises(TypeError):
            Pipeline([("real", MockTransformer()), ("bad", Thing())])

        # validate a bad final estimator on the VisualPipeline
        with pytest.raises(TypeError):
            VisualPipeline([("real", MockTransformer()), ("bad", Thing())])

        # validate visual transformers on a Pipeline
        try:
            Pipeline(
                [
                    ("real", MockTransformer()),
                    ("visual", MockVisualTransformer()),
                    ("model", MockEstimator()),
                ]
            )
        except TypeError:
            self.fail("could not add a visual transformer to a Pipeline!")

        # validate visual transformers on a VisualPipeline
        try:
            VisualPipeline(
                [
                    ("real", MockTransformer()),
                    ("visual", MockVisualTransformer()),
                    ("model", MockEstimator()),
                ]
            )
        except TypeError:
            self.fail("could not add a visual transformer to a VisualPipeline!")
    def test_fit_transform_show_and_draw_calls(self):
        """
        Test calling fit, transform, and show on the pipeline
        """

        pipeline = VisualPipeline([
            ("a", mock.MagicMock(MockTransformer())),
            ("b", mock.MagicMock(MockVisualTransformer())),
            ("c", mock.MagicMock(MockTransformer())),
            ("d", mock.MagicMock(MockVisualTransformer())),
            ("e", mock.MagicMock(MockEstimator())),
        ])

        X = [[1, 1, 1, 1, 1], [2, 2, 2, 2, 2], [3, 3, 3, 3, 3]]

        y = [1, 2, 3, 4, 5]

        pipeline.fit(X, y)
        for name, step in pipeline.named_steps.items():
            step.fit.assert_called_once_with(X, y)

        pipeline.transform(X)
        for name, step in pipeline.named_steps.items():
            if name == "e":
                continue
            step.transform.assert_called_once_with(X)

        pipeline.show()
        for name, step in pipeline.named_steps.items():
            if name in {"a", "c", "e"}:
                continue
            step.show.assert_called_once_with(outpath=None)
    def test_pipeline_show(self):
        """
        Test the show call against the VisualPipeline
        """

        pipeline = VisualPipeline([
            ("a", mock.MagicMock(MockTransformer())),
            ("b", mock.MagicMock(MockVisualTransformer())),
            ("c", mock.MagicMock(MockTransformer())),
            ("d", mock.MagicMock(MockVisualTransformer())),
            ("e", mock.MagicMock(MockEstimator())),
        ])

        pipeline.show()
        pipeline.steps[1][1].show.assert_called_once_with(outpath=None)
        pipeline.steps[3][1].show.assert_called_once_with(outpath=None)
示例#6
0
    def test_pipeline_savefig_poof(self):
        """
        Test the poof call with an outdir to save all the figures
        """
        pipeline = VisualPipeline([
            ('a', mock.MagicMock(MockTransformer())),
            ('b', mock.MagicMock(MockVisualTransformer())),
            ('c', mock.MagicMock(MockTransformer())),
            ('d', mock.MagicMock(MockVisualTransformer())),
            ('e', mock.MagicMock(MockVisualEstimator()),)
        ])

        pipeline.poof(outdir="/tmp/figures")
        pipeline.steps[1][1].poof.assert_called_once_with(outpath="/tmp/figures/b.pdf")
        pipeline.steps[3][1].poof.assert_called_once_with(outpath="/tmp/figures/d.pdf")
        pipeline.steps[4][1].poof.assert_called_once_with(outpath="/tmp/figures/e.pdf")
示例#7
0
    def test_pipeline_poof(self):
        """
        Test the poof call against the VisualPipeline
        """

        pipeline = VisualPipeline([
            ('a', mock.MagicMock(MockTransformer())),
            ('b', mock.MagicMock(MockVisualTransformer())),
            ('c', mock.MagicMock(MockTransformer())),
            ('d', mock.MagicMock(MockVisualTransformer())),
            ('e', mock.MagicMock(MockEstimator()),)
        ])

        pipeline.poof()
        pipeline.steps[1][1].poof.assert_called_once_with(outpath=None)
        pipeline.steps[3][1].poof.assert_called_once_with(outpath=None)
    def test_pipeline_poof(self):
        """
        Test the poof call against the VisualPipeline
        """

        pipeline = VisualPipeline([
            ('a', mock.MagicMock(MockTransformer())),
            ('b', mock.MagicMock(MockVisualTransformer())),
            ('c', mock.MagicMock(MockTransformer())),
            ('d', mock.MagicMock(MockVisualTransformer())),
            ('e', mock.MagicMock(MockEstimator()),)
        ])

        pipeline.poof()
        pipeline.steps[1][1].poof.assert_called_once_with(outpath=None)
        pipeline.steps[3][1].poof.assert_called_once_with(outpath=None)
    def test_fit_transform_poof_and_draw_calls(self):
        """
        Test calling fit, transform, and poof on the pipeline
        """

        pipeline = VisualPipeline([
            ('a', mock.MagicMock(MockTransformer())),
            ('b', mock.MagicMock(MockVisualTransformer())),
            ('c', mock.MagicMock(MockTransformer())),
            ('d', mock.MagicMock(MockVisualTransformer())),
            ('e', mock.MagicMock(MockEstimator()),)
        ])

        X = [[1, 1, 1, 1, 1],
             [2, 2, 2, 2, 2],
             [3, 3, 3, 3, 3]]

        y =  [1, 2, 3, 4, 5]

        pipeline.fit(X, y)
        for name, step in pipeline.named_steps.items():
            step.fit.assert_called_once_with(X, y)

        pipeline.transform(X)
        for name, step in pipeline.named_steps.items():
            if name == 'e': continue
            step.transform.assert_called_once_with(X)

        pipeline.poof()
        for name, step in pipeline.named_steps.items():
            if name in {'a', 'c', 'e'}: continue
            step.poof.assert_called_once_with(outpath=None)
    def test_pipeline_savefig_poof(self):
        """
        Test the poof call with an outdir to save all the figures
        """
        pipeline = VisualPipeline([
            ('a', mock.MagicMock(MockTransformer())),
            ('b', mock.MagicMock(MockVisualTransformer())),
            ('c', mock.MagicMock(MockTransformer())),
            ('d', mock.MagicMock(MockVisualTransformer())),
            ('e', mock.MagicMock(MockVisualEstimator()),)
        ])

        # Must use path joining for Windows compatibility
        tmpdir = os.path.join("tmp", "figures")

        pipeline.poof(outdir=tmpdir)
        pipeline.steps[1][1].poof.assert_called_once_with(outpath=os.path.join(tmpdir, "b.pdf"))
        pipeline.steps[3][1].poof.assert_called_once_with(outpath=os.path.join(tmpdir, "d.pdf"))
        pipeline.steps[4][1].poof.assert_called_once_with(outpath=os.path.join(tmpdir, "e.pdf"))
def selectDiscr():
    data_path = "labeled_data.csv"
    data = pd.read_csv(data_path)

    # We create the preprocessing pipelines for both numeric and categorical data.
    numeric_features = ['count_reviews', 'rating']
    numeric_transformer = Pipeline(
        steps=[('imputer',
                SimpleImputer(strategy='median')), ('scaler',
                                                    StandardScaler())])

    categorical_features = ['product_category']
    categorical_transformer = Pipeline(
        steps=[('imputer',
                SimpleImputer(strategy='constant', fill_value='missing')
                ), ('onehot', OneHotEncoder(handle_unknown='ignore'))])

    preprocessor = ColumnTransformer(
        transformers=[('num', numeric_transformer, numeric_features
                       ), ('cat', categorical_transformer,
                           categorical_features)])

    viz = DiscriminationThreshold(LogisticRegression())

    clf = VisualPipeline(steps=[
        ('preprocessor', preprocessor),
        #('classifier', LogisticRegression(solver='lbfgs')),
        ('viz', viz)
    ])

    X = data.drop('label', axis=1)
    y = data['label']

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

    model = clf.fit(X_train, y_train)
    model.poof()
示例#12
0
    def test_visual_steps_property(self):
        """
        Test the visual steps property to filter visualizers
        """

        pipeline = VisualPipeline([
            ('a', MockTransformer()),
            ('b', VisualTransformerSpec()),
            ('c', MockTransformer()),
            ('d', VisualTransformerSpec()),
            ('e', MockEstimator()),
        ])

        self.assertIn('b', pipeline.visual_steps)
        self.assertIn('d', pipeline.visual_steps)
    def test_visual_steps_property(self):
        """
        Test the visual steps property to filter visualizers
        """

        pipeline = VisualPipeline([
            ("a", MockTransformer()),
            ("b", MockVisualTransformer()),
            ("c", MockTransformer()),
            ("d", MockVisualTransformer()),
            ("e", MockEstimator()),
        ])

        assert "a" not in pipeline.visual_steps
        assert "b" in pipeline.visual_steps
        assert "c" not in pipeline.visual_steps
        assert "d" in pipeline.visual_steps
        assert "e" not in pipeline.visual_steps
示例#14
0
def modelSelection():
    data_path = "labeled_data.csv"
    data = pd.read_csv(data_path)

    # Preprocessing pipelines for both numeric and categorical data.
    numeric_features = ['count_reviews', 'rating']
    numeric_transformer = Pipeline(
        steps=[('imputer',
                SimpleImputer(strategy='median')), ('scaler',
                                                    StandardScaler())])

    categorical_features = ['product_category']
    categorical_transformer = Pipeline(
        steps=[('imputer',
                SimpleImputer(strategy='constant', fill_value='missing')
                ), ('onehot', OneHotEncoder(handle_unknown='ignore'))])

    preprocessor = ColumnTransformer(
        transformers=[('num', numeric_transformer, numeric_features
                       ), ('cat', categorical_transformer,
                           categorical_features)])

    X = data.drop('label', axis=1)
    y = data['label']
    fig = plt.figure()
    ax = fig.add_subplot()

    #viz_ridge = ClassificationReport(RidgeClassifier(), classes = ['not recommended', 'recommended'], support=True)
    viz_logistic = ClassificationReport(
        LogisticRegression(),
        #SGDClassifier(),
        #RidgeClassifier(),
        classes=['not recommended', 'recommended'],
        support=True)

    #clf_ridge = VisualPipeline(steps=[('preprocessor', preprocessor),
    #                      #('classifier', LogisticRegression(solver='lbfgs')),
    #                      ('viz', viz_ridge)])

    #Visual Pipeline is used to visualize the report
    clf_logistic = VisualPipeline(steps=[
        ('preprocessor', preprocessor),
        #('classifier', LogisticRegression(solver='lbfgs')),
        ('viz', viz_logistic)
    ])

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

    #model_ridge = clf_ridge.fit(X_train, y_train)
    model_logistic = clf_logistic.fit(X_train, y_train)

    #preds_ridge = clf_ridge.predict(X_test)
    preds_logistic = clf_logistic.predict(X_test)

    #print("RidgeClassifier model score: %.3f" % clf_ridge.score(X_test, y_test))
    print("LogisticRegression model score: %.3f" %
          clf_logistic.score(X_test, y_test))
    #clf_ridge.poof()
    clf_logistic.poof()

    # Evaluate accuracy
    #print("RidgeClassifier accuracy: ", accuracy_score(y_test, preds_ridge))
    print("LogisticRegression accuracy: ",
          accuracy_score(y_test, preds_logistic))

    final_predictions = X_test
    final_predictions['target'] = y_test
    final_predictions['prediction'] = preds_logistic

    #print(final_predictions)

    filename = 'model_products.sav'
    joblib.dump(model_logistic, filename)