def test_publish_and_execute(tmpdir): import pickle from sasctl.utils.pymas import from_pickle from sasctl.services import microanalytic_score as mas sklearn = pytest.importorskip('sklearn') from sklearn import datasets from sklearn.linear_model import LinearRegression pd = pytest.importorskip('pandas') data = sklearn.datasets.load_boston() X = pd.DataFrame(data.data, columns=data.feature_names) y = pd.DataFrame(data.target, columns=['Price']) lm = LinearRegression() lm.fit(X, y) pkl = pickle.dumps(lm) p = from_pickle(pkl, 'predict', X, array_input=True) mas.create_module('sasctl_test', source=p.score_code(), language='ds2') x1 = {k.lower(): v for k, v in X.iloc[0, :].items()} result = mas.execute_module_step('sasctl_test', 'score', **x1) assert result['rc'] == 0 assert result['var1'] == 24 assert result['msg'] is None
def test_with_sklearn_pipeline(train_data, sklearn_pipeline): from sasctl.utils.pymas import PyMAS, from_pickle X, y = train_data p = from_pickle(pickle.dumps(sklearn_pipeline), func_name='predict', input_types=X) assert isinstance(p, PyMAS) assert len(p.variables) > 4 # 4 input features in Iris data set
def test_from_pickle_with_class(): """Create a PyMAS instance from a pickled object.""" import pickle from sasctl.utils.pymas import from_pickle data = pickle.dumps(DummyClass()) with pytest.raises(ValueError): result = from_pickle(data) # No function specified with mock.patch('sasctl.utils.pymas.core.PyMAS', autospec=True) as mocked: result = from_pickle(data, 'func') assert 1 == mocked.call_count call_args = mocked.call_args[0] assert [DS2Variable('x1', 'str', False), DS2Variable('x2', 'int', False), DS2Variable('out1', 'float', True)] == call_args[1] # Variables assert isinstance(result, PyMAS)
def test_from_pickle_with_func(): """Create a PyMAS instance from a pickled object.""" import pickle from sasctl.utils.pymas import from_pickle data = pickle.dumps(dummy_func) with mock.patch('sasctl.utils.pymas.core.PyMAS', autospec=True) as mocked: result = from_pickle(data) assert 1 == mocked.call_count call_args = mocked.call_args[0] assert [DS2Variable('x1', 'str', False), DS2Variable('x2', 'int', False), DS2Variable('out1', 'float', True)] == call_args[1] # Variables assert isinstance(result, PyMAS)
def test_publish_and_execute(tmpdir, boston_dataset): import pickle from sasctl.utils.pymas import from_pickle from sasctl.services import microanalytic_score as mas from sklearn.linear_model import LinearRegression X = boston_dataset[boston_dataset.columns[:-1]] y = boston_dataset[boston_dataset.columns[-1]] lm = LinearRegression() lm.fit(X, y) pkl = pickle.dumps(lm) p = from_pickle(pkl, 'predict', X, array_input=True) # Generate the score code & publish as a model code = p.score_code() mas.create_module('sasctl_test', source=code, language='ds2') x1 = {k.lower(): v for k, v in X.iloc[0, :].items()} result = mas.execute_module_step('sasctl_test', 'predict', **x1) assert round(result['var1'], 3) == 30.004 mas.delete_module('sasctl_test')
def test_from_pickle_stream(train_data, pickle_stream): from sasctl.utils.pymas import PyMAS, from_pickle X, y = train_data p = from_pickle(pickle_stream, func_name='predict', input_types=X) assert isinstance(p, PyMAS)
def test_from_pickle(train_data, pickle_file): import re from sasctl.utils.pymas import PyMAS, from_pickle X, y = train_data with mock.patch('uuid.uuid4') as mocked: mocked.return_value.hex = 'DF74A4B18C9E41A2A34B0053E123AA67' p = from_pickle(pickle_file, func_name='predict', input_types=X, array_input=True) target = """ package _DF74A4B18C9E41A2A34B0053E123AA6 / overwrite=yes; dcl package pymas py; dcl package logger logr('App.tk.MAS'); dcl varchar(67108864) character set utf8 pycode; dcl int revision; method score( double SepalLength, double SepalWidth, double PetalLength, double PetalWidth, in_out char var1, in_out integer rc, in_out char msg ); if null(py) then do; py = _new_ pymas(); rc = py.useModule('mypymodule', 1); if rc then do; rc = py.appendSrcLine('try:'); rc = py.appendSrcLine(' import pickle, base64'); rc = py.appendSrcLine(' bytes = "X"'); rc = py.appendSrcLine(' obj = pickle.loads(base64.b64decode(bytes))'); rc = py.appendSrcLine(' _compile_error = None'); rc = py.appendSrcLine('except Exception as e:'); rc = py.appendSrcLine(' _compile_error = e'); rc = py.appendSrcLine(''); rc = py.appendSrcLine('def wrapper(SepalLength, SepalWidth, PetalLength, PetalWidth):'); rc = py.appendSrcLine(' "Output: var1, msg"'); rc = py.appendSrcLine(' result = None'); rc = py.appendSrcLine(' try:'); rc = py.appendSrcLine(' global _compile_error'); rc = py.appendSrcLine(' if _compile_error is not None:'); rc = py.appendSrcLine(' raise _compile_error'); rc = py.appendSrcLine(' msg = ""'); rc = py.appendSrcLine(' import numpy as np'); rc = py.appendSrcLine(' result = obj.predict(np.array([SepalLength,SepalWidth,PetalLength,PetalWidth]).reshape((1, -1)))'); rc = py.appendSrcLine(' if result.size == 1:'); rc = py.appendSrcLine(' result = np.asscalar(result)'); rc = py.appendSrcLine(' except Exception as e:'); rc = py.appendSrcLine(' msg = str(e)'); rc = py.appendSrcLine(' if result is None:'); rc = py.appendSrcLine(' result = tuple(None for i in range(1))'); rc = py.appendSrcLine(' if isinstance(result, tuple):'); rc = py.appendSrcLine(' return tuple(x for x in list(result) + [msg])'); rc = py.appendSrcLine(' else: '); rc = py.appendSrcLine(' return result, msg'); pycode = py.getSource(); revision = py.publish(pycode, 'mypymodule'); if revision lt 1 then do; logr.log('e', 'py.publish() failed.'); rc = -1; return; end; end; rc = py.useMethod('wrapper'); if rc then return; end; rc = py.setDouble('SepalLength', SepalLength); if rc then return; rc = py.setDouble('SepalWidth', SepalWidth); if rc then return; rc = py.setDouble('PetalLength', PetalLength); if rc then return; rc = py.setDouble('PetalWidth', PetalWidth); if rc then return; rc = py.execute(); if rc then return; var1 = py.getString('var1'); msg = py.getString('msg'); end; endpackage; """ assert isinstance(p, PyMAS) # Drop leading \n caused by multiline comment formatting result = p.score_code() # Ignore byte string during comparison. Pickle seems to change with # time / Python versions result = re.sub('bytes = .*', 'bytes = "X"\');', result) assert result == target.lstrip('\n')
def test_from_pickle_2(train_data, pickle_file): import re from sasctl.utils.pymas import PyMAS, from_pickle X, _ = train_data with mock.patch('uuid.uuid4') as mocked: mocked.return_value.hex = 'DF74A4B18C9E41A2A34B0053E123AA67' p = from_pickle(pickle_file, func_name=['predict', 'predict_proba'], input_types=X, array_input=True) target = """ package _DF74A4B18C9E41A2A34B0053E123AA6 / overwrite=yes; dcl package pymas py; dcl package logger logr('App.tk.MAS'); dcl varchar(67108864) character set utf8 pycode; dcl int revision; method init(); dcl integer rc; if null(py) then do; py = _new_ pymas(); rc = py.useModule('DF74A4B18C9E41A2A34B0053E123AA67', 1); if rc then do; rc = py.appendSrcLine('try:'); rc = py.appendSrcLine(' import pickle, base64'); rc = py.appendSrcLine(' bytes = b"X"'); rc = py.appendSrcLine(' obj = pickle.loads(base64.b64decode(bytes))'); rc = py.appendSrcLine(' _compile_error = None'); rc = py.appendSrcLine('except Exception as e:'); rc = py.appendSrcLine(' _compile_error = e'); rc = py.appendSrcLine(''); rc = py.appendSrcLine('def predict(SepalLength, SepalWidth, PetalLength, PetalWidth):'); rc = py.appendSrcLine(' "Output: var1, msg"'); rc = py.appendSrcLine(' result = None'); rc = py.appendSrcLine(' msg = None'); rc = py.appendSrcLine(' try:'); rc = py.appendSrcLine(' global _compile_error'); rc = py.appendSrcLine(' if _compile_error is not None:'); rc = py.appendSrcLine(' raise _compile_error'); rc = py.appendSrcLine(' import numpy as np'); rc = py.appendSrcLine(' import pandas as pd'); rc = py.appendSrcLine(''); rc = py.appendSrcLine(' if SepalLength is None: SepalLength = np.nan'); rc = py.appendSrcLine(' if SepalWidth is None: SepalWidth = np.nan'); rc = py.appendSrcLine(' if PetalLength is None: PetalLength = np.nan'); rc = py.appendSrcLine(' if PetalWidth is None: PetalWidth = np.nan'); rc = py.appendSrcLine(' input_array = np.array([SepalLength, SepalWidth, PetalLength, PetalWidth]).reshape((1, -1))'); rc = py.appendSrcLine(' columns = ["SepalLength", "SepalWidth", "PetalLength", "PetalWidth"]'); rc = py.appendSrcLine(' input_df = pd.DataFrame(data=input_array, columns=columns)'); rc = py.appendSrcLine(' result = obj.predict(input_df)'); rc = py.appendSrcLine(' result = tuple(result.ravel()) if hasattr(result, "ravel") else tuple(result)'); rc = py.appendSrcLine(' if len(result) == 0:'); rc = py.appendSrcLine(' result = tuple(None for i in range(1))'); rc = py.appendSrcLine(' elif "numpy" in str(type(result[0])):'); rc = py.appendSrcLine(' result = tuple(np.asscalar(i) for i in result)'); rc = py.appendSrcLine(' except Exception as e:'); rc = py.appendSrcLine(' from traceback import format_exc'); rc = py.appendSrcLine(' msg = str(e) + format_exc()'); rc = py.appendSrcLine(' if result is None:'); rc = py.appendSrcLine(' result = tuple(None for i in range(1))'); rc = py.appendSrcLine(' return result + (msg, )'); rc = py.appendSrcLine(''); rc = py.appendSrcLine('def predict_proba(SepalLength, SepalWidth, PetalLength, PetalWidth):'); rc = py.appendSrcLine(' "Output: P_1, P_2, P_3, msg"'); rc = py.appendSrcLine(' result = None'); rc = py.appendSrcLine(' msg = None'); rc = py.appendSrcLine(' try:'); rc = py.appendSrcLine(' global _compile_error'); rc = py.appendSrcLine(' if _compile_error is not None:'); rc = py.appendSrcLine(' raise _compile_error'); rc = py.appendSrcLine(' import numpy as np'); rc = py.appendSrcLine(' import pandas as pd'); rc = py.appendSrcLine(''); rc = py.appendSrcLine(' if SepalLength is None: SepalLength = np.nan'); rc = py.appendSrcLine(' if SepalWidth is None: SepalWidth = np.nan'); rc = py.appendSrcLine(' if PetalLength is None: PetalLength = np.nan'); rc = py.appendSrcLine(' if PetalWidth is None: PetalWidth = np.nan'); rc = py.appendSrcLine(' input_array = np.array([SepalLength, SepalWidth, PetalLength, PetalWidth]).reshape((1, -1))'); rc = py.appendSrcLine(' columns = ["SepalLength", "SepalWidth", "PetalLength", "PetalWidth"]'); rc = py.appendSrcLine(' input_df = pd.DataFrame(data=input_array, columns=columns)'); rc = py.appendSrcLine(' result = obj.predict_proba(input_df)'); rc = py.appendSrcLine(' result = tuple(result.ravel()) if hasattr(result, "ravel") else tuple(result)'); rc = py.appendSrcLine(' if len(result) == 0:'); rc = py.appendSrcLine(' result = tuple(None for i in range(3))'); rc = py.appendSrcLine(' elif "numpy" in str(type(result[0])):'); rc = py.appendSrcLine(' result = tuple(np.asscalar(i) for i in result)'); rc = py.appendSrcLine(' except Exception as e:'); rc = py.appendSrcLine(' from traceback import format_exc'); rc = py.appendSrcLine(' msg = str(e) + format_exc()'); rc = py.appendSrcLine(' if result is None:'); rc = py.appendSrcLine(' result = tuple(None for i in range(3))'); rc = py.appendSrcLine(' return result + (msg, )'); pycode = py.getSource(); revision = py.publish(pycode, 'DF74A4B18C9E41A2A34B0053E123AA67'); if revision lt 1 then do; logr.log('e', 'py.publish() failed.'); rc = -1; end; end; end; end; method predict( double SepalLength, double SepalWidth, double PetalLength, double PetalWidth, in_out char var1 ); dcl integer rc; dcl varchar(4068) msg; rc = py.useMethod('predict'); if rc then return; rc = py.setDouble('SepalLength', SepalLength); if rc then return; rc = py.setDouble('SepalWidth', SepalWidth); if rc then return; rc = py.setDouble('PetalLength', PetalLength); if rc then return; rc = py.setDouble('PetalWidth', PetalWidth); if rc then return; rc = py.execute(); if rc then return; var1 = py.getString('var1'); msg = py.getString('msg'); if not null(msg) then logr.log('e', 'Error executing Python function "predict": $s', msg); end; method predict_proba( double SepalLength, double SepalWidth, double PetalLength, double PetalWidth, in_out double P_1, in_out double P_2, in_out double P_3 ); dcl integer rc; dcl varchar(4068) msg; rc = py.useMethod('predict_proba'); if rc then return; rc = py.setDouble('SepalLength', SepalLength); if rc then return; rc = py.setDouble('SepalWidth', SepalWidth); if rc then return; rc = py.setDouble('PetalLength', PetalLength); if rc then return; rc = py.setDouble('PetalWidth', PetalWidth); if rc then return; rc = py.execute(); if rc then return; P_1 = py.getDouble('P_1'); P_2 = py.getDouble('P_2'); P_3 = py.getDouble('P_3'); msg = py.getString('msg'); if not null(msg) then logr.log('e', 'Error executing Python function "predict_proba": $s', msg); end; endpackage; """.lstrip('\n') assert isinstance(p, PyMAS) # Drop leading \n caused by multiline comment formatting result = p.score_code() # Ignore byte string during comparison. Pickle seems to change with # time / Python versions result = re.sub('bytes = .*', 'bytes = b"X"\');', result) assert result == target