示例#1
0
    def test_automl_usecase(self):
        # train featurization pipeline
        featurization_pipe = Pipeline([NGramFeaturizer(keep_diacritics=True, columns={'Features': ['SentimentText']})])
        featurization_pipe.fit(train_set)

        # train learner pipeline
        learner_pipe = Pipeline([DatasetTransformer(featurization_pipe.model),
                    OneVsRestClassifier(AveragedPerceptronBinaryClassifier(),
                                       feature=['Features'], label='Sentiment')
        ])
        learner_pipe.fit(train_set)

        # Export the learner pipeline to ONNX
        onnx_path = get_tmp_file('.onnx')
        learner_pipe.export_to_onnx(onnx_path, 'com.microsoft.ml', onnx_version='Stable')

        # Perform the transform using the standard ML.Net backend
        start = time.time()
        result_standard = learner_pipe.predict(test_set)
        end = time.time()
        print('%ss done transform using standard backend' % round(end -  start, 3))

        # Perform the transform using the ORT backend
        df_tool = DFT(onnx_path)
        dataset = test_set.to_df()
        start = time.time()
        result_ort = df_tool.execute(dataset, ['PredictedLabel.output', 'Score.output'])
        end = time.time()
        print('%ss done transform using ORT backend (excludes df load time)' % round(end - start, 3))

        # compare the results
        for col_tuple in (('PredictedLabel', 'PredictedLabel.output'), 
                          ('Score.0', 'Score.output.0'),
                          ('Score.1', 'Score.output.1'),
                          ):
            col_expected = result_standard.loc[:, col_tuple[0]]
            col_ort = result_ort.loc[:, col_tuple[1]]

            check_kwargs = {
                'check_names': False,
                'check_exact': False,
                'check_dtype': True,
                'check_less_precise': True
            }

            pd.testing.assert_series_equal(col_expected, col_ort, **check_kwargs)
示例#2
0
train_data = {'c1': x, 'c2': y}
train_df = pd.DataFrame(train_data).astype({
    'c1': np.float32,
    'c2': np.float32
})

test_data = {'c1': [2.5, 30.5], 'c2': [1, 1]}
test_df = pd.DataFrame(test_data).astype({'c1': np.float32, 'c2': np.float32})

# Fit a MinMaxScaler Pipeline
r1 = Pipeline([MinMaxScaler()])
r1.fit(train_df)

# Export the pipeline to ONNX
onnx_path = get_tmp_file('.onnx')
r1.export_to_onnx(onnx_path, 'com.microsoft.ml', onnx_version='Stable')

# Perform the transform using the standard ML.Net backend
result_standard = r1.transform(test_df)
print(result_standard)
#          c1        c2
# 0  0.025025  0.000998
# 1  0.305305  0.000998

# Perform the transform using the ONNX backend.
# Note, the extra columns and column name differences
# is a known issue with the ML.Net backend.
onnxrunner = OnnxRunner(model_file=onnx_path)
result_onnx = onnxrunner.fit_transform(test_df)
print(result_onnx)
#      c1   c2     c12.0     c22.0