Пример #1
0
 def test_model_summary(self):
     for learner in learners:
         pipeline = Pipeline(
             [OneHotVectorizer() << categorical_columns, learner])
         train_stream = FileDataStream(train_file, schema=file_schema)
         pipeline.fit(train_stream, label_column)
         pipeline.summary()
Пример #2
0
    def test_pipeline_summary_is_refreshed_after_refitting(self):
        predictor = OrdinaryLeastSquaresRegressor()
        pipeline = Pipeline([predictor])

        pipeline.fit([0,1,2,3], [1,2,3,4])
        summary1 = pipeline.summary()

        pipeline.fit([0,1,2.5,3], [2,5,8,11])
        summary2 = pipeline.summary()

        self.assertFalse(summary1.equals(summary2))
 def test_input_types(self):
     df = DataFrame(data=dict(Label=[1, 2, 3, 4, 5],
                              f=[1.1, 2.2, 3.3, np.nan, 5.5],
                              f1=[2.2, np.nan, 4.4, 5.5, 6.6]))
     h = Handler(replace_with='Mean')
     ft = FastLinearRegressor(shuffle=False, number_of_threads=1)
     p = Pipeline([h, ft])
     p.fit(df[['f', 'f1']].values, df['Label'])
     res = p.predict(df[['f', 'f1']].values)
     print(res)
     print(p.summary())
     assert_allclose(res['Score'].values,
                     [4.965541, 0.519701, 4.992831, 3.877400, 5.020121],
                     rtol=1e-4)
Пример #4
0
 def test_model_summary_not_supported_specific(self):
     path = get_dataset('infert').as_filepath()
     data = FileDataStream.read_csv(path,
                                    sep=',',
                                    names={
                                        0: 'row_num',
                                        5: 'case'
                                    })
     pipeline = Pipeline([
         OneHotVectorizer(columns={'edu': 'education'}),
         FactorizationMachineBinaryClassifier(
             feature=['induced', 'edu', 'parity'], label='case')
     ])
     pipeline.fit(data)
     try:
         pipeline.summary()
     except TypeError as e:
         self.assertEqual(
             e.args[0],
             "One or more predictors in this pipeline do not support the .summary() function."
         )
     else:
         assert False
Пример #5
0
import pandas as pd
from nimbusml import Pipeline, FileDataStream
from nimbusml.linear_model import FastLinearRegressor
from nimbusml.preprocessing.normalization import MeanVarianceScaler

X = np.array([[1, 2.0], [2, 4], [3, 0.7]])
Y = np.array([2, 3, 1.5])

df = pd.DataFrame(dict(y=Y, x1=X[:, 0], x2=X[:, 1]))

pipe = Pipeline([
    MeanVarianceScaler(),
    FastLinearRegressor()
])

# fit with pandas dataframe
pipe.fit(X, Y)

# Fit with FileDataStream
df.to_csv('data.csv', index=False)
ds = FileDataStream.read_csv('data.csv', sep=',')

pipe = Pipeline([
    MeanVarianceScaler(),
    FastLinearRegressor()
])
pipe.fit(ds, 'y')
print(pipe.summary())
#       Bias  Weights.x1  Weights.x2
# 0  1.032946    0.111758    1.210791