def test_experiment_loadsavemodel(self): (train, label) = get_X_y(train_file, label_column, sep=',') (test, label1) = get_X_y(test_file, label_column, sep=',') cat = OneHotVectorizer() << categorical_columns ftree = FastTreesBinaryClassifier() pipeline = Pipeline([cat, ftree]) pipeline.fit(train, label) metrics1, scores1 = pipeline.test(test, label1, 'binary', output_scores=True) sum1 = metrics1.sum().sum() (fd, modelfilename) = tempfile.mkstemp(suffix='.model.bin') fl = os.fdopen(fd, 'w') fl.close() pipeline.save_model(modelfilename) pipeline2 = Pipeline() pipeline2.load_model(modelfilename) metrics2, scores2 = pipeline2.test(test, label1, 'binary', output_scores=True) sum2 = metrics2.sum().sum() assert_equal(sum1, sum2, "model metrics don't match after loading model")
def test_pipeline_loaded_from_zip_has_feature_contributions(self): features = ['age', 'education-num', 'hours-per-week'] model_nimbusml = Pipeline( steps=[FastLinearBinaryClassifier(feature=features)]) model_nimbusml.fit(train, label) fc = model_nimbusml.get_feature_contributions(test) # Save the model to zip model_filename = get_temp_file(suffix='.zip') model_nimbusml.save_model(model_filename) # Load the model from zip model_nimbusml_zip = Pipeline() model_nimbusml_zip.load_model(model_filename) fc_zip = model_nimbusml_zip.get_feature_contributions(test) assert ['FeatureContributions.' + feature in fc_zip.columns for feature in features] assert [fc['FeatureContributions.' + feature].equals( fc_zip['FeatureContributions.' + feature]) for feature in features] os.remove(model_filename)
def test_model_datastream(self): model_nimbusml = Pipeline( steps=[ ('cat', OneHotVectorizer() << categorical_columns), ('linear', FastLinearBinaryClassifier( shuffle=False, number_of_threads=1))]) model_nimbusml.fit(train, label) # Save with pickle pickle_filename = get_temp_file(suffix='.p') with open(pickle_filename, 'wb') as f: pickle.dump(model_nimbusml, f) with open(pickle_filename, "rb") as f: model_nimbusml_pickle = pickle.load(f) os.remove(pickle_filename) score1 = model_nimbusml.predict(test).head(5) score2 = model_nimbusml_pickle.predict(test).head(5) metrics, score = model_nimbusml.test(test, test_label, output_scores=True) metrics_pickle, score_pickle = model_nimbusml_pickle.test( test, test_label, output_scores=True) assert_almost_equal(score1.sum().sum(), score2.sum().sum(), decimal=2) assert_almost_equal( metrics.sum().sum(), metrics_pickle.sum().sum(), decimal=2) # Save load with pipeline methods model_filename = get_temp_file(suffix='.m') model_nimbusml.save_model(model_filename) model_nimbusml_load = Pipeline() model_nimbusml_load.load_model(model_filename) score1 = model_nimbusml.predict(test).head(5) score2 = model_nimbusml_load.predict(test).head(5) metrics, score = model_nimbusml.test(test, test_label, output_scores=True) model_nimbusml_load, score_load = model_nimbusml_load.test( test, test_label, evaltype='binary', output_scores=True) assert_almost_equal(score1.sum().sum(), score2.sum().sum(), decimal=2) assert_almost_equal( metrics.sum().sum(), model_nimbusml_load.sum().sum(), decimal=2) os.remove(model_filename)
def test_pass_predict_proba_from_load_model(selfs): pipeline = Pipeline([LogisticRegressionBinaryClassifier()]) pipeline.fit(X_train, y_train) probs1 = pipeline.predict_proba(X_test) sum1 = probs1.sum().sum() (fd, modelfilename) = tempfile.mkstemp(suffix='.model.bin') fl = os.fdopen(fd, 'w') fl.close() pipeline.save_model(modelfilename) pipeline2 = Pipeline() pipeline2.load_model(modelfilename) probs2 = pipeline2.predict_proba(X_test) sum2 = probs2.sum().sum() assert_equal(sum1, sum2, "model probabilities don't match after loading model")
def test_model_datastream(self): model_nimbusml = Pipeline( steps=[('cat', OneHotVectorizer() << categorical_columns), ('linear', FastLinearBinaryClassifier(shuffle=False, train_threads=1) )]) model_nimbusml.fit(train, label) # Save with pickle pickle.dump(model_nimbusml, open('nimbusml_model.p', 'wb')) model_nimbusml_pickle = pickle.load(open("nimbusml_model.p", "rb")) score1 = model_nimbusml.predict(test).head(5) score2 = model_nimbusml_pickle.predict(test).head(5) metrics, score = model_nimbusml.test(test, test_label, output_scores=True) metrics_pickle, score_pickle = model_nimbusml_pickle.test( test, test_label, output_scores=True) assert_almost_equal(score1.sum().sum(), score2.sum().sum(), decimal=2) assert_almost_equal(metrics.sum().sum(), metrics_pickle.sum().sum(), decimal=2) # Save load with pipeline methods model_nimbusml.save_model('model.nimbusml.m') model_nimbusml_load = Pipeline() model_nimbusml_load.load_model('model.nimbusml.m') score1 = model_nimbusml.predict(test).head(5) score2 = model_nimbusml_load.predict(test).head(5) metrics, score = model_nimbusml.test(test, test_label, output_scores=True) model_nimbusml_load, score_load = model_nimbusml_load.test( test, test_label, evaltype='binary', output_scores=True) assert_almost_equal(score1.sum().sum(), score2.sum().sum(), decimal=2) assert_almost_equal(metrics.sum().sum(), model_nimbusml_load.sum().sum(), decimal=2)