class TestLinearRegressionIntegration(TestCase): def setUp(self): df = pd.read_csv(path.join(BASE_DIR, '../models/categorical-test.csv')) Xte = df.iloc[:, 1:] yte = df.iloc[:, 0] self.test = (Xte, yte) pmml = path.join(BASE_DIR, '../models/linear-regression.pmml') self.clf = PMMLLinearRegression(pmml) def test_predict(self): Xte, _ = self.test ref = np.array([0.959047661, 0.306635481, 0.651029985, 0.954874880, 0.889268596, 0.874413539, 0.552911965, 0.793971929, 0.567604727, 0.694441984, 0.977588079, 1.020076443, 0.938209348, 0.741296266, 0.785681506, 0.783314305, 0.147203243, 0.953499858, 0.861694209, 0.818535888, 1.054586791, 0.508564304, 0.490740907, 0.692194962, 0.546339084, 0.584074930, 0.817451147, 0.007120341, -0.023103301, 0.354232979, 0.452602313, -0.232817829, 0.289612034, 0.241502904, 0.098388728, 0.413283786, 0.349024715, 0.315999598, 0.656973238, 0.525739661, 0.243258999, 0.128203855, 0.151826018, 0.357043960, 0.647876971, 0.405659892, 0.264334997, 0.280004394, 0.948749766, -0.028252457, 0.415301011, 0.509803923]) assert np.allclose(ref, self.clf.predict(Xte)) def test_score(self): Xte, yte = self.test ref = 0.409378064635437 assert ref == self.clf.score(Xte, yte == 'Yes') def test_fit_exception(self): with self.assertRaises(Exception) as cm: self.clf.fit(np.array([[]]), np.array([])) assert str(cm.exception) == 'Not supported.'
def setUp(self): df = pd.read_csv(path.join(BASE_DIR, '../models/categorical-test.csv')) Xte = df.iloc[:, 1:] yte = df.iloc[:, 0] self.test = (Xte, yte) pmml = path.join(BASE_DIR, '../models/linear-regression.pmml') self.clf = PMMLLinearRegression(pmml)
def get_model(self, model_url): print("model_url was: {0}".format(model_url)) if not model_url: self.model = "No Model To Use" if model_url[-3:] == "pkl": print("found a pickle") with open(model_url, "rb") as ff: self.model = pickle.load(ff) if model_url[-4:] == "pnnm": self.model = PMMLLinearRegression(model_url)
def test_nonlinear_model(self): with self.assertRaises(Exception) as cm: PMMLLinearRegression(pmml=StringIO(""" <PMML xmlns="http://www.dmg.org/PMML-4_3" version="4.3"> <DataDictionary> <DataField name="Class" optype="categorical" dataType="string"> <Value value="setosa"/> <Value value="versicolor"/> <Value value="virginica"/> </DataField> <DataField name="a" optype="continuous" dataType="double"/> </DataDictionary> <RegressionModel> <MiningSchema> <MiningField name="Class" usageType="target"/> </MiningSchema> <RegressionTable> <NumericPredictor name="a" exponent="1" coefficient="1"/> <NumericPredictor name="a" exponent="1" coefficient="1"/> </RegressionTable> </RegressionModel> </PMML> """)) assert str(cm.exception) == 'PMML model is not linear.'
def test_invalid_model(self): with self.assertRaises(Exception) as cm: PMMLLinearRegression(pmml=StringIO(""" <PMML xmlns="http://www.dmg.org/PMML-4_3" version="4.3"> <DataDictionary> <DataField name="Class" optype="categorical" dataType="string"> <Value value="setosa"/> <Value value="versicolor"/> <Value value="virginica"/> </DataField> </DataDictionary> <MiningSchema> <MiningField name="Class" usageType="target"/> </MiningSchema> </PMML> """)) assert str(cm.exception) == 'PMML model does not contain RegressionModel.'
class CTModel(): def __init__(self): self.TMP_MODEL_HOME = "/tmp" self.model = "I wish I was a model" def get_model(self, model_url): print("model_url was: {0}".format(model_url)) if not model_url: self.model = "No Model To Use" if model_url[-3:] == "pkl": print("found a pickle") with open(model_url, "rb") as ff: self.model = pickle.load(ff) if model_url[-4:] == "pnnm": self.model = PMMLLinearRegression(model_url) #if not model_url.contains("."): # wget.download(model_url, self.TMP_MODEL_HOME) # model = h2o.loadModel(TMP_MODEL_HOME) def show_model(self): if self.model.type == "h2o": model_summary = self.model.model_performance() if self.model.type == "pickle": model_summary = self.model.do_something() if self.model.type == "pnnm": model_summary = self.model.do_something() return model_summary def regression_results(self, y_true, y_pred): # Regression metrics explained_variance = metrics.explained_variance_score(y_true, y_pred) mean_absolute_error = metrics.mean_absolute_error(y_true, y_pred) mse = metrics.mean_squared_error(y_true, y_pred) mean_squared_log_error = metrics.mean_squared_log_error(y_true, y_pred) median_absolute_error = metrics.median_absolute_error(y_true, y_pred) r2 = metrics.r2_score(y_true, y_pred) fpr, tpr, thresholds = metrics.roc_curve(y_true, y_pred, pos_label=2) auc = metrics.auc(fpr, tpr) model_scores = { 'explained_variance: ': round(explained_variance, 4), 'AUC: ': auc, 'R2: ': round(r2, 4), 'MAE: ': round(mean_absolute_error, 4), 'MSE: ': round(mse, 4), 'RMSE: ': round(np.sqrt(mse), 4), 'mean_squared_log_error: ': round(mean_squared_log_error, 4) } return model_scores def fit_predict_and_score(self, X, Y): self.model.fit(X, Y) Y_pred = self.model.predict(X) scores = self.regression_results(Y, Y_pred) self.scores = scores return scores def make_graph(self): df = pd.DataFrame( dict(r=list(self.scores.values()), theta=list(self.scores.keys()))) fig = px.line_polar(df, r='r', theta='theta', line_close=True) fig.update_traces(fill='toself') figfile = io.BytesIO() fig.write_image(figfile, format='png') figfile.seek(0) # rewind to beginning of file figbuffer = b''.join(figfile) figdata_png = base64.b64encode(figbuffer) print("im in the graph_maker") return figdata_png.decode('utf8')