def build_iforest_housing_anomaly(iforest, name): mapper = DataFrameMapper([(housing_X.columns.values, ContinuousDomain())]) pipeline = PMMLPipeline([("mapper", mapper), ("estimator", iforest)]) pipeline.fit(housing_X) store_pkl(pipeline, name + ".pkl") decisionFunction = DataFrame(pipeline.decision_function(housing_X), columns=["decisionFunction"]) outlier = DataFrame(pipeline.predict(housing_X) == -1, columns=["outlier" ]).replace(True, "true").replace(False, "false") store_csv(pandas.concat([decisionFunction, outlier], axis=1), name + ".csv")
def build_svm_housing_anomaly(svm, name): mapper = DataFrameMapper([(housing_columns[:-1], ContinuousDomain())]) pipeline = PMMLPipeline([("mapper", mapper), ("estimator", Pipeline([("first", MaxAbsScaler()), ("second", svm)]))]) pipeline.fit(housing_X) store_pkl(pipeline, name + ".pkl") decisionFunction = DataFrame(pipeline.decision_function(housing_X), columns=["decisionFunction"]) outlier = DataFrame(pipeline.predict(housing_X) <= 0, columns=["outlier" ]).replace(True, "true").replace(False, "false") store_csv(pandas.concat([decisionFunction, outlier], axis=1), name + ".csv")