def test_ort_gradient_optimizers_use_numpy_nesterov(self): from onnxcustom.utils.orttraining_helper import add_loss_output from onnxcustom.training.optimizers import OrtGradientOptimizer X, y = make_regression( # pylint: disable=W0632 100, n_features=10, bias=2, random_state=0) X = X.astype(numpy.float32) y = y.astype(numpy.float32) X_train, _, y_train, __ = train_test_split(X, y) reg = LinearRegression() reg.fit(X_train, y_train) reg.coef_ = reg.coef_.reshape((1, -1)) onx = to_onnx(reg, X_train, target_opset=opset, black_op={'LinearRegressor'}) set_model_props(onx, {'info': 'unit test'}) onx_loss = add_loss_output(onx) inits = ['intercept', 'coef'] self.assertRaise( lambda: OrtGradientOptimizer( onx_loss, inits, learning_rate="Nesterov"), NotImplementedError)
def test_ort_gradient_optimizers_use_numpy_w_l1(self): from onnxcustom.utils.orttraining_helper import add_loss_output from onnxcustom.training.optimizers import OrtGradientOptimizer X, y = make_regression( # pylint: disable=W0632 100, n_features=10, bias=2, random_state=0) X = X.astype(numpy.float32) y = y.astype(numpy.float32) w = (numpy.random.rand(y.shape[0]) + 1).astype(X.dtype) X_train, _, y_train, __, w_train, ___ = train_test_split(X, y, w) reg = LinearRegression() reg.fit(X_train, y_train, sample_weight=w_train) reg.coef_ = reg.coef_.reshape((1, -1)) onx = to_onnx(reg, X_train, target_opset=opset, black_op={'LinearRegressor'}) set_model_props(onx, {'info': 'unit test'}) onx_loss = add_loss_output(onx, weight_name='weight', score_name='l1') inits = ['intercept', 'coef'] train_session = OrtGradientOptimizer(onx_loss, inits, learning_rate=1e-3) self.assertRaise(lambda: train_session.get_state(), AttributeError) train_session.fit(X_train, y_train, w_train, use_numpy=True) state_tensors = train_session.get_state() self.assertEqual(len(state_tensors), 2) r = repr(train_session) self.assertIn("OrtGradientOptimizer(model_onnx=", r) self.assertIn("learning_rate='invscaling'", r) losses = train_session.train_losses_ self.assertGreater(len(losses), 1) self.assertFalse(any(map(numpy.isnan, losses)))
def test_add_loss_output_cls(self): from onnxcustom.utils.orttraining_helper import add_loss_output X, y = make_classification( # pylint: disable=W0632 100, n_features=10) X = X.astype(numpy.float32) y = y.astype(numpy.int64) X_train, X_test, y_train, y_test = train_test_split(X, y) reg = LogisticRegression() reg.fit(X_train, y_train) reg.coef_ = reg.coef_.reshape((1, -1)) onx = to_onnx(reg, X_train, target_opset=opset, black_op={'LinearClassifier'}, options={'zipmap': False}) onx_loss = add_loss_output(onx, 'log', output_index='probabilities', eps=1 - 6) try: text = onnx_simple_text_plot(onx_loss) except RuntimeError: text = "" if text: self.assertIn("Clip(probabilities", text) oinf = OnnxInference(onx_loss) output = oinf.run({'X': X_test, 'label': y_test.reshape((-1, 1))}) loss = output['loss'] skl_loss = log_loss(y_test, reg.predict_proba(X_test), eps=1 - 6) self.assertLess(numpy.abs(skl_loss - loss[0, 0]), 1e-5)
def test_ort_gradient_optimizers_optimal_use_ort(self): from onnxcustom.utils.orttraining_helper import add_loss_output from onnxcustom.training.optimizers import OrtGradientOptimizer X, y = make_regression( # pylint: disable=W0632 100, n_features=10, bias=2, random_state=0) X = X.astype(numpy.float32) y = y.astype(numpy.float32) X_train, _, y_train, __ = train_test_split(X, y) reg = LinearRegression() reg.fit(X_train, y_train) reg.coef_ = reg.coef_.reshape((1, -1)) onx = to_onnx(reg, X_train, target_opset=opset, black_op={'LinearRegressor'}) onx_loss = add_loss_output(onx) inits = ['intercept', 'coef'] train_session = OrtGradientOptimizer( onx_loss, inits, max_iter=10, learning_rate=LearningRateSGD(learning_rate='optimal')) self.assertRaise(lambda: train_session.get_state(), AttributeError) train_session.fit(X_train, y_train, use_numpy=False) state_tensors = train_session.get_state() self.assertEqual(len(state_tensors), 2) r = repr(train_session) self.assertIn("OrtGradientOptimizer(model_onnx=", r) self.assertIn("learning_rate='optimal'", r) losses = train_session.train_losses_ self.assertGreater(len(losses), 1) self.assertFalse(any(map(numpy.isnan, losses)))
def test_ort_gradient_optimizers_use_numpy_nan_w(self): from onnxcustom.utils.orttraining_helper import add_loss_output from onnxcustom.training.optimizers import OrtGradientOptimizer X, y = make_regression( # pylint: disable=W0632 100, n_features=10, bias=2, random_state=0) X = X.astype(numpy.float32) y = y.astype(numpy.float32) w = (numpy.random.rand(y.shape[0]) + 1).astype(X.dtype) X_train, _, y_train, __, w_train, ___ = train_test_split(X, y, w) reg = LinearRegression() reg.fit(X_train, y_train, w_train) reg.coef_ = reg.coef_.reshape((1, -1)) onx = to_onnx(reg, X_train, target_opset=opset, black_op={'LinearRegressor'}) set_model_props(onx, {'info': 'unit test'}) onx_loss = add_loss_output(onx, weight_name='weight') inits = ['intercept', 'coef'] train_session = OrtGradientOptimizer(onx_loss, inits, learning_rate=1e3) self.assertRaise( lambda: train_session.fit( X_train, y_train, w_train, use_numpy=True), ConvergenceError)
def wtest_ort_gradient_optimizers_binary(self, use_weight=False): from onnxcustom.utils.orttraining_helper import add_loss_output from onnxcustom.training.optimizers import OrtGradientOptimizer X = numpy.arange(60).astype(numpy.float32).reshape((-1, 3)) y = numpy.arange(X.shape[0]).astype(numpy.float32).reshape( (-1, 1)) > 10 X = X.astype(numpy.float32) y = y.astype(numpy.int64) y[0, 0] = 0 y[-1, 0] = 1 w = (numpy.random.rand(X.shape[0]) + 1).astype(numpy.float32) X_train, _, y_train, __, w_train, ___ = train_test_split(X, y, w) reg = SGDClassifier(loss='log') reg.fit(X_train, y_train.ravel()) onx = to_onnx(reg, X_train, target_opset=opset, black_op={'LinearClassifier'}, options={'zipmap': False}) onx_loss = add_loss_output( onx, 'log', output_index=1, weight_name='weight' if use_weight else None) inits = ['intercept', 'coef'] inputs = onx_loss.graph.input self.assertEqual(len(inputs), 3 if use_weight else 2) dt = inputs[1].type.tensor_type.elem_type self.assertEqual(TensorProto.INT64, dt) # pylint: disable=E1101 train_session = OrtGradientOptimizer(onx_loss, inits, learning_rate=1e9) self.assertRaise(lambda: train_session.get_state(), AttributeError) if use_weight: train_session.fit(X_train, y_train.reshape((-1, 1)), w_train.reshape((-1, 1)), use_numpy=False) else: train_session.fit(X_train, y_train.reshape((-1, 1)), use_numpy=False) state_tensors = train_session.get_state() self.assertEqual(len(state_tensors), 2) r = repr(train_session) self.assertIn("OrtGradientOptimizer(model_onnx=", r) self.assertIn("learning_rate='invscaling'", r) losses = train_session.train_losses_ self.assertGreater(len(losses), 1) if any(map(numpy.isnan, losses)): raise AssertionError(losses)
def test_add_log_loss(self): from onnxcustom.utils.orttraining_helper import add_loss_output ide = OnnxIdentity("X", op_version=opset, output_names=['Y']) onx = ide.to_onnx(inputs={'X': DoubleTensorType()}, outputs={'Y': DoubleTensorType()}, target_opset=opset) onx_loss = add_loss_output(onx, 'log', eps=1e-6) x1 = numpy.array([0, 0, 0.2, 0.5, 0.8, 1, 1]) X = numpy.vstack([1 - x1, x1]).T.astype(numpy.float64) y = numpy.array([0, 1, 0, 1, 1, 1, 0], dtype=numpy.int64) oinf = OnnxInference(onx_loss) output = oinf.run({'X': X, 'label': y.reshape((-1, 1))}) loss = output['loss'] skl_loss = log_loss(y, X[:, 1], eps=1e-6) self.assertLess(numpy.abs(skl_loss - loss[0, 0]), 1e-5)
def test_ort_gradient_optimizers_use_numpy_pickle(self): from onnxcustom.utils.orttraining_helper import add_loss_output from onnxcustom.training.optimizers import OrtGradientOptimizer X, y = make_regression( # pylint: disable=W0632 100, n_features=10, bias=2, random_state=0) X = X.astype(numpy.float32) y = y.astype(numpy.float32) X_train, _, y_train, __ = train_test_split(X, y) reg = LinearRegression() reg.fit(X_train, y_train) reg.coef_ = reg.coef_.reshape((1, -1)) onx = to_onnx(reg, X_train, target_opset=opset, black_op={'LinearRegressor'}) set_model_props(onx, {'info': 'unit test'}) onx_loss = add_loss_output(onx) inits = ['intercept', 'coef'] train_session0 = OrtGradientOptimizer(onx_loss, inits) st = io.BytesIO() pickle.dump(train_session0, st) st2 = io.BytesIO(st.getvalue()) train_session1 = pickle.load(st2) train_session1.fit(X_train, y_train, use_numpy=True) st = io.BytesIO() pickle.dump(train_session1, st) st2 = io.BytesIO(st.getvalue()) train_session = pickle.load(st2) state_tensors = train_session.get_state() self.assertEqual(len(state_tensors), 2) train_session.fit(X_train, y_train, use_numpy=True) state_tensors = train_session.get_state() self.assertEqual(len(state_tensors), 2) r = repr(train_session) self.assertIn("OrtGradientOptimizer(model_onnx=", r) self.assertIn("learning_rate='invscaling'", r) losses = train_session.train_losses_ self.assertGreater(len(losses), 1) self.assertFalse(any(map(numpy.isnan, losses)))
def wtest_ort_gradient_optimizers_reg(self, use_weight=False): from onnxcustom.utils.orttraining_helper import add_loss_output from onnxcustom.training.optimizers import OrtGradientOptimizer X = numpy.arange(60).astype(numpy.float32).reshape((-1, 3)) y = numpy.arange(X.shape[0]).astype(numpy.float32).reshape((-1, 1)) y[0, 0] += 1 y[-1, 0] += 1 w = (numpy.random.rand(X.shape[0]) + 1).astype(numpy.float32) X_train, _, y_train, __, w_train, ___ = train_test_split(X, y, w) reg = SGDRegressor() reg.fit(X_train, y_train.ravel()) onx = to_onnx(reg, X_train, target_opset=opset, black_op={'LinearRegressor'}) onx_loss = add_loss_output( onx, 'squared_error', weight_name='weight' if use_weight else None) inits = ['intercept', 'coef'] inputs = onx_loss.graph.input self.assertEqual(len(inputs), 3 if use_weight else 2) train_session = OrtGradientOptimizer(onx_loss, inits, learning_rate=1e9) self.assertRaise(lambda: train_session.get_state(), AttributeError) if use_weight: self.assertRaise( lambda: train_session.fit(X_train, y_train.reshape((-1, 1)), w_train.reshape((-1, 1)), use_numpy=False), ConvergenceError) else: self.assertRaise( lambda: train_session.fit( X_train, y_train.reshape((-1, 1)), use_numpy=False), ConvergenceError) state_tensors = train_session.get_state() self.assertEqual(len(state_tensors), 2) r = repr(train_session) self.assertIn("OrtGradientOptimizer(model_onnx=", r) self.assertIn("learning_rate='invscaling'", r) losses = train_session.train_losses_ self.assertGreater(len(losses), 1) if any(map(numpy.isnan, losses)): raise AssertionError(losses)
def test_add_loss_output_reg_l1(self): from onnxcustom.utils.orttraining_helper import add_loss_output X, y = make_regression( # pylint: disable=W0632 100, n_features=10, bias=2) X = X.astype(numpy.float32) y = y.astype(numpy.float32) X_train, X_test, y_train, y_test = train_test_split(X, y) reg = LinearRegression() reg.fit(X_train, y_train) reg.coef_ = reg.coef_.reshape((1, -1)) onx = to_onnx(reg, X_train, target_opset=opset, black_op={'LinearRegressor'}) onx_loss = add_loss_output(onx, 'l1') oinf = OnnxInference(onx_loss) output = oinf.run({'X': X_test, 'label': y_test.reshape((-1, 1))}) loss = output['loss'] skl_loss = mean_squared_error(reg.predict(X_test), y_test) self.assertLess(numpy.abs(skl_loss - loss[0, 0]), 1e-2)
def test_ort_gradient_optimizers_use_numpy_saved(self): from onnxcustom.utils.orttraining_helper import add_loss_output from onnxcustom.training.optimizers import OrtGradientOptimizer X, y = make_regression( # pylint: disable=W0632 100, n_features=10, bias=2, random_state=0) X = X.astype(numpy.float32) y = y.astype(numpy.float32) X_train, _, y_train, __ = train_test_split(X, y) reg = LinearRegression() reg.fit(X_train, y_train) reg.coef_ = reg.coef_.reshape((1, -1)) onx = to_onnx(reg, X_train, target_opset=opset, black_op={'LinearRegressor'}) set_model_props(onx, {'info': 'unit test'}) onx_loss = add_loss_output(onx) inits = ['intercept', 'coef'] temp = get_temp_folder(__file__, "temp_OrtGradientOptimizer") filename = os.path.join(temp, "saved.onnx") train_session = OrtGradientOptimizer(onx_loss, inits, learning_rate=1e-3, saved_gradient=filename) self.assertRaise(lambda: train_session.get_state(), AttributeError) train_session.fit(X_train, y_train, use_numpy=True) state_tensors = train_session.get_state() self.assertEqual(len(state_tensors), 2) r = repr(train_session) self.assertIn("OrtGradientOptimizer(model_onnx=", r) self.assertIn("learning_rate='invscaling'", r) losses = train_session.train_losses_ self.assertGreater(len(losses), 1) self.assertFalse(any(map(numpy.isnan, losses))) self.assertExists(filename)
def test_grad_helper_loss(self): temp = get_temp_folder(__file__, "temp_grad_helper_loss") grad_file = os.path.join(temp, "grad.onnx") X, y = make_regression( # pylint: disable=W0632 100, n_features=10, bias=2, random_state=0) X = X.astype(numpy.float32) y = y.astype(numpy.float32) reg = LinearRegression() reg.fit(X, y) reg.coef_ = reg.coef_.reshape((1, -1)) onx = to_onnx(reg, X, target_opset=opset, black_op={'LinearRegressor'}) onx_loss = add_loss_output(onx) text1 = onnx_simple_text_plot(onx_loss) new_onx = onnx_derivative(onx, options=DerivativeOptions.Loss, label='variable', loss='loss', path_name=grad_file) text2 = onnx_simple_text_plot(new_onx) self.assertNotEqual(text1, text2)
X_train[:1].astype(numpy.float32), target_opset=15, black_op={'LinearRegressor'}) ############################################### # Choosing a loss # +++++++++++++++ # # The training requires a loss function. By default, it # is the square function but it could be the absolute error or # include regularization. Function # :func:`add_loss_output # <onnxcustom.utils.orttraining_helper.add_loss_output>` # appends the loss function to the ONNX graph. onx_train = add_loss_output(onx) plot_onnxs(onx, onx_train, title=['Linear Regression', 'Linear Regression + Loss with ONNX']) ##################################### # Let's check inference is working. sess = InferenceSession(onx_train.SerializeToString(), providers=['CPUExecutionProvider']) res = sess.run(None, {'X': X_test, 'label': y_test.reshape((-1, 1))}) print(f"onnx loss={res[0][0, 0] / X_test.shape[0]!r}") ##################################### # Weights
def benchmark(N=1000, n_features=100, hidden_layer_sizes="50,10", max_iter=1000, learning_rate_init=1e-4, batch_size=100, run_skl=True, device='cpu', opset=14): """ Compares :epkg:`onnxruntime-training` to :epkg:`scikit-learn` for training. Training algorithm is SGD. :param N: number of observations to train on :param n_features: number of features :param hidden_layer_sizes: hidden layer sizes, comma separated values :param max_iter: number of iterations :param learning_rate_init: initial learning rate :param batch_size: batch size :param run_skl: train scikit-learn in the same condition (True) or just walk through one iterator with *scikit-learn* :param device: `'cpu'` or `'cuda'` :param opset: opset to choose for the conversion """ N = int(N) n_features = int(n_features) max_iter = int(max_iter) learning_rate_init = float(learning_rate_init) batch_size = int(batch_size) run_skl = run_skl in (1, True, '1', 'True') print("N=%d" % N) print("n_features=%d" % n_features) print(f"hidden_layer_sizes={hidden_layer_sizes!r}") print("max_iter=%d" % max_iter) print(f"learning_rate_init={learning_rate_init:f}") print("batch_size=%d" % batch_size) print(f"run_skl={run_skl!r}") print(f"opset={opset!r}") print(f"device={device!r}") print('------------------') if not isinstance(hidden_layer_sizes, tuple): hidden_layer_sizes = tuple(map(int, hidden_layer_sizes.split(","))) X, y = make_regression(N, n_features=n_features, bias=2) X = X.astype(numpy.float32) y = y.astype(numpy.float32) X_train, X_test, y_train, y_test = train_test_split(X, y) nn = MLPRegressor(hidden_layer_sizes=hidden_layer_sizes, max_iter=max_iter if run_skl else 1, solver='sgd', learning_rate_init=learning_rate_init, n_iter_no_change=max_iter, batch_size=batch_size) begin = time.perf_counter() with warnings.catch_warnings(): warnings.simplefilter('ignore') nn.fit(X_train, y_train) dur_skl = time.perf_counter() - begin print("time_skl=%r, mean_squared_error=%r" % ( dur_skl, mean_squared_error(y_train, nn.predict(X_train)))) # conversion to ONNX onx = to_onnx(nn, X_train[:1].astype(numpy.float32), target_opset=opset) # add loss onx_train = add_loss_output(onx) # list of weights weights = get_train_initializer(onx) print('weights:', list(sorted(weights))) # training print(f"device={device!r} get_device()={get_device()!r}") ####################################### # The training session. train_session = OrtGradientOptimizer( onx_train, list(weights), device=device, verbose=0, learning_rate=learning_rate_init, warm_start=False, max_iter=max_iter, batch_size=batch_size) begin = time.perf_counter() train_session.fit(X, y) dur_ort = time.perf_counter() - begin print("time_skl=%r, mean_squared_error=%r" % ( dur_skl, mean_squared_error(y_train, nn.predict(X_train)))) print("time_ort=%r, last_trained_error=%r" % ( dur_ort, train_session.train_losses_[-1]))
def wtest_ort_gradient_optimizers_binary(self, use_weight=False): from onnxcustom.utils.orttraining_helper import add_loss_output from onnxcustom.training.optimizers import OrtGradientOptimizer X, y = make_classification( # pylint: disable=W0632 100, n_features=10, random_state=0) X = X.astype(numpy.float32) y = y.astype(numpy.int64) w = (numpy.random.rand(X.shape[0]) + 1).astype(numpy.float32) X_train, _, y_train, __, w_train, ___ = train_test_split(X, y, w) reg = SGDClassifier(loss='log') reg.fit(X_train, y_train) onx = to_onnx(reg, X_train, target_opset=opset, black_op={'LinearClassifier'}, options={'zipmap': False}) onx2 = load_onnx(BytesIO(onx.SerializeToString())) set_model_props(onx, {'info': 'unit test'}) onx_loss = add_loss_output( onx, 'log', output_index=1, weight_name='weight' if use_weight else None) inits = ['intercept', 'coef'] inputs = onx_loss.graph.input self.assertEqual(len(inputs), 3 if use_weight else 2) dt = inputs[1].type.tensor_type.elem_type self.assertEqual(TensorProto.INT64, dt) # pylint: disable=E1101 train_session = OrtGradientOptimizer(onx_loss, inits, learning_rate=1e-3) self.assertRaise(lambda: train_session.get_state(), AttributeError) if use_weight: train_session.fit(X_train, y_train.reshape((-1, 1)), w_train.reshape((-1, 1)), use_numpy=False) else: train_session.fit(X_train, y_train.reshape((-1, 1)), use_numpy=False) state_tensors = train_session.get_state() self.assertEqual(len(state_tensors), 2) r = repr(train_session) self.assertIn("OrtGradientOptimizer(model_onnx=", r) self.assertIn("learning_rate='invscaling'", r) losses = train_session.train_losses_ self.assertGreater(len(losses), 1) self.assertFalse(any(map(numpy.isnan, losses))) # get_trained_weight trained_onnx = train_session.get_trained_onnx(model=onx2) sess = InferenceSession(onx2.SerializeToString(), providers=['CPUExecutionProvider']) got1 = sess.run(None, {'X': X_train}) sess = InferenceSession(trained_onnx.SerializeToString(), providers=['CPUExecutionProvider']) got2 = sess.run(None, {'X': X_train}) self.assertEqual(len(got1), len(got2)) self.assertEqual(got1[0].shape, got2[0].shape) # state state = train_session.get_state() self.assertIsInstance(state, dict) train_session.set_state(state)