def test_PcaAnomalyDetector(self): np.random.seed(0) df = get_dataset("iris").as_df() df.drop(['Species'], inplace=True, axis=1) X_train, X_test, y_train, y_test = train_test_split( df.loc[:, df.columns != 'Label'], df['Label']) svm = PcaAnomalyDetector(rank=3) svm.fit(X_train, y_train) scores = svm.predict(X_test) assert_almost_equal(scores.sum().sum(), 4.181632, decimal=7, err_msg="Sum should be %s" % 4.181632)
OneVsRestClassifier(AveragedPerceptronBinaryClassifier(), use_probabilities=True, feature=['age', 'education_str.0-5yrs', 'education_str.6-11yrs', 'education_str.12+ yrs'], label='induced'), 'OneVsRestClassifier(LinearSvmBinaryClassifier)': \ OneVsRestClassifier(LinearSvmBinaryClassifier(), use_probabilities=True, feature=['age', 'education_str.0-5yrs', 'education_str.6-11yrs', 'education_str.12+ yrs'], label='induced'), 'PcaAnomalyDetector': PcaAnomalyDetector(rank=3), 'PcaTransformer': PcaTransformer(rank=2), 'PixelExtractor': Pipeline([ Loader(columns={'ImgPath': 'Path'}), PixelExtractor(columns={'ImgPixels': 'ImgPath'}), ]), 'PrefixColumnConcatenator': PrefixColumnConcatenator(columns={'Features': 'Sepal_'}), 'Resizer': Pipeline([ Loader(columns={'ImgPath': 'Path'}), Resizer(image_width=227, image_height=227, columns={'ImgResize': 'ImgPath'}) ]), 'SkipFilter': SkipFilter(count=5), 'SsaSpikeDetector': SsaSpikeDetector(columns=['Sepal_Length'], seasonal_window_size=2), 'SsaChangePointDetector': SsaChangePointDetector(columns=['Sepal_Length'],
LogisticRegressionClassifier(), OnlineGradientDescentRegressor(), SgdBinaryClassifier(), # SymSgdBinaryClassifier(), OrdinaryLeastSquaresRegressor(), PoissonRegressionRegressor() ] learners_not_supported = [ NaiveBayesClassifier(), # fix in nimbusml, needs to implement ICanGetSummaryAsIDataView KMeansPlusPlus(), # fix in nimbusml, needs to implement ICanGetSummaryAsIDataView # fix in nimbusml, needs to implement ICanGetSummaryAsIDataView FactorizationMachineBinaryClassifier(), PcaAnomalyDetector(), # fix in nimbusml, needs to implement ICanGetSummaryAsIDataView # PcaTransformer(), # REVIEW: crashes GamBinaryClassifier(), # fix in nimbusml, needs to implement ICanGetSummaryAsIDataView GamRegressor( ), # fix in nimbusml, needs to implement ICanGetSummaryAsIDataView LightGbmClassifier(), # fix in nimbusml, needs to implement ICanGetSummaryAsIDataView # LightGbmRanker(), # REVIEW: crashes # fix in nimbusml, needs to implement ICanGetSummaryAsIDataView OneVsRestClassifier(FastLinearBinaryClassifier()), ] class TestModelSummary(unittest.TestCase):
# data input (as a FileDataStream) path = get_dataset('infert').as_filepath() data = FileDataStream.read_csv(path) print(data.head()) # age case education induced parity ... row_num spontaneous ... # 0 26 1 0-5yrs 1 6 ... 1 2 ... # 1 42 1 0-5yrs 1 1 ... 2 0 ... # 2 39 1 0-5yrs 2 6 ... 3 0 ... # 3 34 1 0-5yrs 2 4 ... 4 0 ... # 4 35 1 6-11yrs 1 3 ... 5 1 ... # define the training pipeline pipeline = Pipeline([ OneHotVectorizer(columns={'edu': 'education'}), PcaAnomalyDetector(rank=3, feature=['induced', 'edu']) ]) # train, predict, and evaluate metrics, predictions = pipeline.fit(data).test(data, 'case', output_scores=True) # Score # 0 0.026155 # 1 0.026155 # 2 0.018055 # 3 0.018055 # 4 0.004043 # print predictions print(predictions.head())
from sklearn.model_selection import train_test_split # use 'iris' data set to create test and train data # Sepal_Length Sepal_Width Petal_Length Petal_Width Label Species Setosa # 0 5.1 3.5 1.4 0.2 0 setosa 1.0 # 1 4.9 3.0 1.4 0.2 0 setosa 1.0 df = get_dataset("iris").as_df() df.drop(['Label', 'Setosa', 'Species'], axis=1, inplace=True) X_train, X_test = train_test_split(df) # homogenous values for labels, y-column required by scikit y_train = np.ones(len(X_train)) svm = PcaAnomalyDetector(rank=3) svm.fit(X_train) # add additional non-iris data to the test data set not_iris = pandas.DataFrame(data=dict(Sepal_Length=[2.5, 2.6], Sepal_Width=[.75, .9], Petal_Length=[2.5, 2.5], Petal_Width=[.8, .7])) merged_test = pandas.concat([X_test, not_iris], sort=False) scores = svm.predict(merged_test) # look at the last few observations print(scores.tail())