def test_automl_usecase(self): # train featurization pipeline featurization_pipe = Pipeline([NGramFeaturizer(keep_diacritics=True, columns={'Features': ['SentimentText']})]) featurization_pipe.fit(train_set) # train learner pipeline learner_pipe = Pipeline([DatasetTransformer(featurization_pipe.model), OneVsRestClassifier(AveragedPerceptronBinaryClassifier(), feature=['Features'], label='Sentiment') ]) learner_pipe.fit(train_set) # Export the learner pipeline to ONNX onnx_path = get_tmp_file('.onnx') learner_pipe.export_to_onnx(onnx_path, 'com.microsoft.ml', onnx_version='Stable') # Perform the transform using the standard ML.Net backend start = time.time() result_standard = learner_pipe.predict(test_set) end = time.time() print('%ss done transform using standard backend' % round(end - start, 3)) # Perform the transform using the ORT backend df_tool = DFT(onnx_path) dataset = test_set.to_df() start = time.time() result_ort = df_tool.execute(dataset, ['PredictedLabel.output', 'Score.output']) end = time.time() print('%ss done transform using ORT backend (excludes df load time)' % round(end - start, 3)) # compare the results for col_tuple in (('PredictedLabel', 'PredictedLabel.output'), ('Score.0', 'Score.output.0'), ('Score.1', 'Score.output.1'), ): col_expected = result_standard.loc[:, col_tuple[0]] col_ort = result_ort.loc[:, col_tuple[1]] check_kwargs = { 'check_names': False, 'check_exact': False, 'check_dtype': True, 'check_less_precise': True } pd.testing.assert_series_equal(col_expected, col_ort, **check_kwargs)
train_data = {'c1': x, 'c2': y} train_df = pd.DataFrame(train_data).astype({ 'c1': np.float32, 'c2': np.float32 }) test_data = {'c1': [2.5, 30.5], 'c2': [1, 1]} test_df = pd.DataFrame(test_data).astype({'c1': np.float32, 'c2': np.float32}) # Fit a MinMaxScaler Pipeline r1 = Pipeline([MinMaxScaler()]) r1.fit(train_df) # Export the pipeline to ONNX onnx_path = get_tmp_file('.onnx') r1.export_to_onnx(onnx_path, 'com.microsoft.ml', onnx_version='Stable') # Perform the transform using the standard ML.Net backend result_standard = r1.transform(test_df) print(result_standard) # c1 c2 # 0 0.025025 0.000998 # 1 0.305305 0.000998 # Perform the transform using the ONNX backend. # Note, the extra columns and column name differences # is a known issue with the ML.Net backend. onnxrunner = OnnxRunner(model_file=onnx_path) result_onnx = onnxrunner.fit_transform(test_df) print(result_onnx) # c1 c2 c12.0 c22.0