def test_fit_transform_poof_and_draw_calls(self): """ Test calling fit, transform, and poof on the pipeline """ pipeline = VisualPipeline([ ('a', mock.MagicMock(MockTransformer())), ('b', mock.MagicMock(MockVisualTransformer())), ('c', mock.MagicMock(MockTransformer())), ('d', mock.MagicMock(MockVisualTransformer())), ('e', mock.MagicMock(MockEstimator()),) ]) X = [[1, 1, 1, 1, 1], [2, 2, 2, 2, 2], [3, 3, 3, 3, 3]] y = [1, 2, 3, 4, 5] pipeline.fit(X, y) for name, step in pipeline.named_steps.items(): step.fit.assert_called_once_with(X, y) pipeline.transform(X) for name, step in pipeline.named_steps.items(): if name == 'e': continue step.transform.assert_called_once_with(X) pipeline.poof() for name, step in pipeline.named_steps.items(): if name in {'a', 'c', 'e'}: continue step.poof.assert_called_once_with(outpath=None)
def test_pipeline_savefig_poof(self): """ Test the poof call with an outdir to save all the figures """ pipeline = VisualPipeline( [ ("a", mock.MagicMock(MockTransformer())), ("b", mock.MagicMock(MockVisualTransformer())), ("c", mock.MagicMock(MockTransformer())), ("d", mock.MagicMock(MockVisualTransformer())), ("e", mock.MagicMock(MockVisualEstimator())), ] ) # Must use path joining for Windows compatibility tmpdir = os.path.join("tmp", "figures") pipeline.poof(outdir=tmpdir) pipeline.steps[1][1].poof.assert_called_once_with( outpath=os.path.join(tmpdir, "b.pdf") ) pipeline.steps[3][1].poof.assert_called_once_with( outpath=os.path.join(tmpdir, "d.pdf") ) pipeline.steps[4][1].poof.assert_called_once_with( outpath=os.path.join(tmpdir, "e.pdf") )
def test_pipeline_savefig_poof(self): """ Test the poof call with an outdir to save all the figures """ pipeline = VisualPipeline([ ('a', mock.MagicMock(MockTransformer())), ('b', mock.MagicMock(MockVisualTransformer())), ('c', mock.MagicMock(MockTransformer())), ('d', mock.MagicMock(MockVisualTransformer())), ('e', mock.MagicMock(MockVisualEstimator()),) ]) pipeline.poof(outdir="/tmp/figures") pipeline.steps[1][1].poof.assert_called_once_with(outpath="/tmp/figures/b.pdf") pipeline.steps[3][1].poof.assert_called_once_with(outpath="/tmp/figures/d.pdf") pipeline.steps[4][1].poof.assert_called_once_with(outpath="/tmp/figures/e.pdf")
def test_pipeline_poof(self): """ Test the poof call against the VisualPipeline """ pipeline = VisualPipeline([ ('a', mock.MagicMock(MockTransformer())), ('b', mock.MagicMock(MockVisualTransformer())), ('c', mock.MagicMock(MockTransformer())), ('d', mock.MagicMock(MockVisualTransformer())), ('e', mock.MagicMock(MockEstimator()),) ]) pipeline.poof() pipeline.steps[1][1].poof.assert_called_once_with(outpath=None) pipeline.steps[3][1].poof.assert_called_once_with(outpath=None)
def test_pipeline_savefig_poof(self): """ Test the poof call with an outdir to save all the figures """ pipeline = VisualPipeline([ ('a', mock.MagicMock(MockTransformer())), ('b', mock.MagicMock(MockVisualTransformer())), ('c', mock.MagicMock(MockTransformer())), ('d', mock.MagicMock(MockVisualTransformer())), ('e', mock.MagicMock(MockVisualEstimator()),) ]) # Must use path joining for Windows compatibility tmpdir = os.path.join("tmp", "figures") pipeline.poof(outdir=tmpdir) pipeline.steps[1][1].poof.assert_called_once_with(outpath=os.path.join(tmpdir, "b.pdf")) pipeline.steps[3][1].poof.assert_called_once_with(outpath=os.path.join(tmpdir, "d.pdf")) pipeline.steps[4][1].poof.assert_called_once_with(outpath=os.path.join(tmpdir, "e.pdf"))
def modelSelection(): data_path = "labeled_data.csv" data = pd.read_csv(data_path) # Preprocessing pipelines for both numeric and categorical data. numeric_features = ['count_reviews', 'rating'] numeric_transformer = Pipeline( steps=[('imputer', SimpleImputer(strategy='median')), ('scaler', StandardScaler())]) categorical_features = ['product_category'] categorical_transformer = Pipeline( steps=[('imputer', SimpleImputer(strategy='constant', fill_value='missing') ), ('onehot', OneHotEncoder(handle_unknown='ignore'))]) preprocessor = ColumnTransformer( transformers=[('num', numeric_transformer, numeric_features ), ('cat', categorical_transformer, categorical_features)]) X = data.drop('label', axis=1) y = data['label'] fig = plt.figure() ax = fig.add_subplot() #viz_ridge = ClassificationReport(RidgeClassifier(), classes = ['not recommended', 'recommended'], support=True) viz_logistic = ClassificationReport( LogisticRegression(), #SGDClassifier(), #RidgeClassifier(), classes=['not recommended', 'recommended'], support=True) #clf_ridge = VisualPipeline(steps=[('preprocessor', preprocessor), # #('classifier', LogisticRegression(solver='lbfgs')), # ('viz', viz_ridge)]) #Visual Pipeline is used to visualize the report clf_logistic = VisualPipeline(steps=[ ('preprocessor', preprocessor), #('classifier', LogisticRegression(solver='lbfgs')), ('viz', viz_logistic) ]) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) #model_ridge = clf_ridge.fit(X_train, y_train) model_logistic = clf_logistic.fit(X_train, y_train) #preds_ridge = clf_ridge.predict(X_test) preds_logistic = clf_logistic.predict(X_test) #print("RidgeClassifier model score: %.3f" % clf_ridge.score(X_test, y_test)) print("LogisticRegression model score: %.3f" % clf_logistic.score(X_test, y_test)) #clf_ridge.poof() clf_logistic.poof() # Evaluate accuracy #print("RidgeClassifier accuracy: ", accuracy_score(y_test, preds_ridge)) print("LogisticRegression accuracy: ", accuracy_score(y_test, preds_logistic)) final_predictions = X_test final_predictions['target'] = y_test final_predictions['prediction'] = preds_logistic #print(final_predictions) filename = 'model_products.sav' joblib.dump(model_logistic, filename)