def test_model_summary(self): for learner in learners: pipeline = Pipeline( [OneHotVectorizer() << categorical_columns, learner]) train_stream = FileDataStream(train_file, schema=file_schema) pipeline.fit(train_stream, label_column) pipeline.summary()
def test_pipeline_summary_is_refreshed_after_refitting(self): predictor = OrdinaryLeastSquaresRegressor() pipeline = Pipeline([predictor]) pipeline.fit([0,1,2,3], [1,2,3,4]) summary1 = pipeline.summary() pipeline.fit([0,1,2.5,3], [2,5,8,11]) summary2 = pipeline.summary() self.assertFalse(summary1.equals(summary2))
def test_input_types(self): df = DataFrame(data=dict(Label=[1, 2, 3, 4, 5], f=[1.1, 2.2, 3.3, np.nan, 5.5], f1=[2.2, np.nan, 4.4, 5.5, 6.6])) h = Handler(replace_with='Mean') ft = FastLinearRegressor(shuffle=False, number_of_threads=1) p = Pipeline([h, ft]) p.fit(df[['f', 'f1']].values, df['Label']) res = p.predict(df[['f', 'f1']].values) print(res) print(p.summary()) assert_allclose(res['Score'].values, [4.965541, 0.519701, 4.992831, 3.877400, 5.020121], rtol=1e-4)
def test_model_summary_not_supported_specific(self): path = get_dataset('infert').as_filepath() data = FileDataStream.read_csv(path, sep=',', names={ 0: 'row_num', 5: 'case' }) pipeline = Pipeline([ OneHotVectorizer(columns={'edu': 'education'}), FactorizationMachineBinaryClassifier( feature=['induced', 'edu', 'parity'], label='case') ]) pipeline.fit(data) try: pipeline.summary() except TypeError as e: self.assertEqual( e.args[0], "One or more predictors in this pipeline do not support the .summary() function." ) else: assert False
import pandas as pd from nimbusml import Pipeline, FileDataStream from nimbusml.linear_model import FastLinearRegressor from nimbusml.preprocessing.normalization import MeanVarianceScaler X = np.array([[1, 2.0], [2, 4], [3, 0.7]]) Y = np.array([2, 3, 1.5]) df = pd.DataFrame(dict(y=Y, x1=X[:, 0], x2=X[:, 1])) pipe = Pipeline([ MeanVarianceScaler(), FastLinearRegressor() ]) # fit with pandas dataframe pipe.fit(X, Y) # Fit with FileDataStream df.to_csv('data.csv', index=False) ds = FileDataStream.read_csv('data.csv', sep=',') pipe = Pipeline([ MeanVarianceScaler(), FastLinearRegressor() ]) pipe.fit(ds, 'y') print(pipe.summary()) # Bias Weights.x1 Weights.x2 # 0 1.032946 0.111758 1.210791