예제 #1
0
    def test_fit_transform_poof_and_draw_calls(self):
        """
        Test calling fit, transform, and poof on the pipeline
        """

        pipeline = VisualPipeline([
            ('a', mock.MagicMock(MockTransformer())),
            ('b', mock.MagicMock(MockVisualTransformer())),
            ('c', mock.MagicMock(MockTransformer())),
            ('d', mock.MagicMock(MockVisualTransformer())),
            ('e', mock.MagicMock(MockEstimator()),)
        ])

        X = [[1, 1, 1, 1, 1],
             [2, 2, 2, 2, 2],
             [3, 3, 3, 3, 3]]

        y =  [1, 2, 3, 4, 5]

        pipeline.fit(X, y)
        for name, step in pipeline.named_steps.items():
            step.fit.assert_called_once_with(X, y)

        pipeline.transform(X)
        for name, step in pipeline.named_steps.items():
            if name == 'e': continue
            step.transform.assert_called_once_with(X)

        pipeline.poof()
        for name, step in pipeline.named_steps.items():
            if name in {'a', 'c', 'e'}: continue
            step.poof.assert_called_once_with(outpath=None)
예제 #2
0
    def test_pipeline_savefig_poof(self):
        """
        Test the poof call with an outdir to save all the figures
        """
        pipeline = VisualPipeline(
            [
                ("a", mock.MagicMock(MockTransformer())),
                ("b", mock.MagicMock(MockVisualTransformer())),
                ("c", mock.MagicMock(MockTransformer())),
                ("d", mock.MagicMock(MockVisualTransformer())),
                ("e", mock.MagicMock(MockVisualEstimator())),
            ]
        )

        # Must use path joining for Windows compatibility
        tmpdir = os.path.join("tmp", "figures")

        pipeline.poof(outdir=tmpdir)
        pipeline.steps[1][1].poof.assert_called_once_with(
            outpath=os.path.join(tmpdir, "b.pdf")
        )
        pipeline.steps[3][1].poof.assert_called_once_with(
            outpath=os.path.join(tmpdir, "d.pdf")
        )
        pipeline.steps[4][1].poof.assert_called_once_with(
            outpath=os.path.join(tmpdir, "e.pdf")
        )
예제 #3
0
    def test_fit_transform_poof_and_draw_calls(self):
        """
        Test calling fit, transform, and poof on the pipeline
        """

        pipeline = VisualPipeline([
            ('a', mock.MagicMock(MockTransformer())),
            ('b', mock.MagicMock(MockVisualTransformer())),
            ('c', mock.MagicMock(MockTransformer())),
            ('d', mock.MagicMock(MockVisualTransformer())),
            ('e', mock.MagicMock(MockEstimator()),)
        ])

        X = [[1, 1, 1, 1, 1],
             [2, 2, 2, 2, 2],
             [3, 3, 3, 3, 3]]

        y =  [1, 2, 3, 4, 5]

        pipeline.fit(X, y)
        for name, step in pipeline.named_steps.items():
            step.fit.assert_called_once_with(X, y)

        pipeline.transform(X)
        for name, step in pipeline.named_steps.items():
            if name == 'e': continue
            step.transform.assert_called_once_with(X)

        pipeline.poof()
        for name, step in pipeline.named_steps.items():
            if name in {'a', 'c', 'e'}: continue
            step.poof.assert_called_once_with(outpath=None)
예제 #4
0
    def test_pipeline_savefig_poof(self):
        """
        Test the poof call with an outdir to save all the figures
        """
        pipeline = VisualPipeline([
            ('a', mock.MagicMock(MockTransformer())),
            ('b', mock.MagicMock(MockVisualTransformer())),
            ('c', mock.MagicMock(MockTransformer())),
            ('d', mock.MagicMock(MockVisualTransformer())),
            ('e', mock.MagicMock(MockVisualEstimator()),)
        ])

        pipeline.poof(outdir="/tmp/figures")
        pipeline.steps[1][1].poof.assert_called_once_with(outpath="/tmp/figures/b.pdf")
        pipeline.steps[3][1].poof.assert_called_once_with(outpath="/tmp/figures/d.pdf")
        pipeline.steps[4][1].poof.assert_called_once_with(outpath="/tmp/figures/e.pdf")
예제 #5
0
    def test_pipeline_poof(self):
        """
        Test the poof call against the VisualPipeline
        """

        pipeline = VisualPipeline([
            ('a', mock.MagicMock(MockTransformer())),
            ('b', mock.MagicMock(MockVisualTransformer())),
            ('c', mock.MagicMock(MockTransformer())),
            ('d', mock.MagicMock(MockVisualTransformer())),
            ('e', mock.MagicMock(MockEstimator()),)
        ])

        pipeline.poof()
        pipeline.steps[1][1].poof.assert_called_once_with(outpath=None)
        pipeline.steps[3][1].poof.assert_called_once_with(outpath=None)
예제 #6
0
    def test_pipeline_poof(self):
        """
        Test the poof call against the VisualPipeline
        """

        pipeline = VisualPipeline([
            ('a', mock.MagicMock(MockTransformer())),
            ('b', mock.MagicMock(MockVisualTransformer())),
            ('c', mock.MagicMock(MockTransformer())),
            ('d', mock.MagicMock(MockVisualTransformer())),
            ('e', mock.MagicMock(MockEstimator()),)
        ])

        pipeline.poof()
        pipeline.steps[1][1].poof.assert_called_once_with(outpath=None)
        pipeline.steps[3][1].poof.assert_called_once_with(outpath=None)
예제 #7
0
    def test_pipeline_savefig_poof(self):
        """
        Test the poof call with an outdir to save all the figures
        """
        pipeline = VisualPipeline([
            ('a', mock.MagicMock(MockTransformer())),
            ('b', mock.MagicMock(MockVisualTransformer())),
            ('c', mock.MagicMock(MockTransformer())),
            ('d', mock.MagicMock(MockVisualTransformer())),
            ('e', mock.MagicMock(MockVisualEstimator()),)
        ])

        # Must use path joining for Windows compatibility
        tmpdir = os.path.join("tmp", "figures")

        pipeline.poof(outdir=tmpdir)
        pipeline.steps[1][1].poof.assert_called_once_with(outpath=os.path.join(tmpdir, "b.pdf"))
        pipeline.steps[3][1].poof.assert_called_once_with(outpath=os.path.join(tmpdir, "d.pdf"))
        pipeline.steps[4][1].poof.assert_called_once_with(outpath=os.path.join(tmpdir, "e.pdf"))
예제 #8
0
def modelSelection():
    data_path = "labeled_data.csv"
    data = pd.read_csv(data_path)

    # Preprocessing pipelines for both numeric and categorical data.
    numeric_features = ['count_reviews', 'rating']
    numeric_transformer = Pipeline(
        steps=[('imputer',
                SimpleImputer(strategy='median')), ('scaler',
                                                    StandardScaler())])

    categorical_features = ['product_category']
    categorical_transformer = Pipeline(
        steps=[('imputer',
                SimpleImputer(strategy='constant', fill_value='missing')
                ), ('onehot', OneHotEncoder(handle_unknown='ignore'))])

    preprocessor = ColumnTransformer(
        transformers=[('num', numeric_transformer, numeric_features
                       ), ('cat', categorical_transformer,
                           categorical_features)])

    X = data.drop('label', axis=1)
    y = data['label']
    fig = plt.figure()
    ax = fig.add_subplot()

    #viz_ridge = ClassificationReport(RidgeClassifier(), classes = ['not recommended', 'recommended'], support=True)
    viz_logistic = ClassificationReport(
        LogisticRegression(),
        #SGDClassifier(),
        #RidgeClassifier(),
        classes=['not recommended', 'recommended'],
        support=True)

    #clf_ridge = VisualPipeline(steps=[('preprocessor', preprocessor),
    #                      #('classifier', LogisticRegression(solver='lbfgs')),
    #                      ('viz', viz_ridge)])

    #Visual Pipeline is used to visualize the report
    clf_logistic = VisualPipeline(steps=[
        ('preprocessor', preprocessor),
        #('classifier', LogisticRegression(solver='lbfgs')),
        ('viz', viz_logistic)
    ])

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

    #model_ridge = clf_ridge.fit(X_train, y_train)
    model_logistic = clf_logistic.fit(X_train, y_train)

    #preds_ridge = clf_ridge.predict(X_test)
    preds_logistic = clf_logistic.predict(X_test)

    #print("RidgeClassifier model score: %.3f" % clf_ridge.score(X_test, y_test))
    print("LogisticRegression model score: %.3f" %
          clf_logistic.score(X_test, y_test))
    #clf_ridge.poof()
    clf_logistic.poof()

    # Evaluate accuracy
    #print("RidgeClassifier accuracy: ", accuracy_score(y_test, preds_ridge))
    print("LogisticRegression accuracy: ",
          accuracy_score(y_test, preds_logistic))

    final_predictions = X_test
    final_predictions['target'] = y_test
    final_predictions['prediction'] = preds_logistic

    #print(final_predictions)

    filename = 'model_products.sav'
    joblib.dump(model_logistic, filename)