def wtest_ort_gradient_optimizers_grid_cls(self, use_weight=False): from onnxcustom.training.optimizers_partial import ( OrtGradientForwardBackwardOptimizer) from onnxcustom.training.sgd_learning_rate import (LearningRateSGD) from onnxcustom.training.sgd_learning_loss import NegLogLearningLoss values = [ 1e-7, 1e-6, 5e-6, 1e-5, 5e-5, 1e-4, 5e-4, 1e-3, 5e-3, 1e-2, 1e-1, 1, 10, 100, 1000 ] X = numpy.random.randn(30, 3).astype(numpy.float32) y = (X.sum(axis=1) >= 0).astype(numpy.int64).reshape((-1, 1)) X += numpy.random.randn(30, 3).astype(numpy.float32) / 10 X_train, _, y_train, __ = train_test_split(X, y) scorer = make_scorer(lambda y_true, y_pred: (-log_loss(y_true, y_pred))) # pylint: disable=E1130 reg = GridSearchCV(SGDClassifier(max_iter=20), param_grid={'eta0': values}, scoring=scorer, cv=3) reg.fit(X_train, y_train.ravel()) self.assertIsInstance(reg.best_params_, dict) self.assertIn('eta0', reg.best_params_) onx = to_onnx(reg, X_train, target_opset=opset, black_op={'LinearClassifier'}, options={'zipmap': False}) onx = select_model_inputs_outputs(onx, outputs=['score']) onx = onnx_rename_weights(onx) inits = ['I0_coef', 'I1_intercept'] cvalues = [LearningRateSGD(v) for v in values] grid = GridSearchCV(OrtGradientForwardBackwardOptimizer( onx, inits, weight_name='weight' if use_weight else None, learning_rate=LearningRateSGD(1e-4), learning_loss=NegLogLearningLoss(), warm_start=False, max_iter=20, batch_size=10, enable_logging=False, exc=False), param_grid={'learning_rate': cvalues}, cv=3) if use_weight: grid.fit(X_train, y_train) else: grid.fit(X_train, y_train) self.assertIsInstance(grid.best_params_, dict) self.assertEqual(len(grid.best_params_), 1) self.assertIsInstance(grid.best_params_['learning_rate'], LearningRateSGD)
def wtest_ort_gradient_optimizers_grid_reg(self, use_weight=False): from onnxcustom.training.optimizers_partial import ( OrtGradientForwardBackwardOptimizer) from onnxcustom.training.sgd_learning_rate import (LearningRateSGD) from onnxcustom.training.sgd_learning_loss import SquareLearningLoss values = [ 1e-6, 1e-5, 5e-5, 8e-5, 1e-4, 2e-4, 5e-4, 1e-3, 1e-2, 1e-1, 1 ] X = numpy.random.randn(30, 3).astype(numpy.float32) y = X.sum(axis=1).reshape((-1, 1)) y += numpy.random.randn(y.shape[0]).astype(numpy.float32).reshape( (-1, 1)) / 10 X_train, _, y_train, __ = train_test_split(X, y) scorer = make_scorer(lambda y_true, y_pred: (-mean_squared_error(y_true, y_pred))) # pylint: disable=E1130 reg = GridSearchCV(SGDRegressor(max_iter=20), param_grid={'eta0': values}, scoring=scorer, cv=3, error_score='raise') reg.fit(X_train, y_train.ravel()) self.assertIsInstance(reg.best_params_, dict) self.assertIn('eta0', reg.best_params_) onx = to_onnx(reg, X_train, target_opset=opset, black_op={'LinearRegressor'}) onx = onnx_rename_weights(onx) inits = ['I0_coef', 'I1_intercept'] cvalues = [LearningRateSGD(v) for v in values] grid = GridSearchCV(OrtGradientForwardBackwardOptimizer( onx, inits, weight_name='weight' if use_weight else None, learning_rate=LearningRateSGD(1e-4), learning_loss=SquareLearningLoss(), warm_start=False, max_iter=20, batch_size=10, enable_logging=False, exc=False), param_grid={'learning_rate': cvalues}, cv=3) if use_weight: grid.fit(X_train, y_train) else: grid.fit(X_train, y_train) self.assertIsInstance(grid.best_params_, dict) self.assertEqual(len(grid.best_params_), 1) self.assertIsInstance(grid.best_params_['learning_rate'], LearningRateSGD)
def wtest_ort_gradient_optimizers_fw_nesterov_binary_mlp( self, use_weight=True): from onnxcustom.training.optimizers_partial import ( OrtGradientForwardBackwardOptimizer) from onnxcustom.training.sgd_learning_rate import ( LearningRateSGDNesterov) from onnxcustom.training.sgd_learning_loss import NegLogLearningLoss X, y = make_classification( # pylint: disable=W0632 100, n_features=10, random_state=0) X = X.astype(numpy.float32) y = y.astype(numpy.int64) w = (numpy.random.rand(y.shape[0]) + 1).astype(numpy.float32) X_train, _, y_train, __, w_train, ___ = train_test_split(X, y, w) reg = MLPClassifier(solver='sgd') reg.fit(X_train, y_train) onx = to_onnx(reg, X_train, target_opset=opset, black_op={'LinearRegressor'}, options={'zipmap': False}) onx = select_model_inputs_outputs(onx, outputs=['out_activations_result']) self.assertIn("output: name='out_activations_result'", onnx_simple_text_plot(onx)) set_model_props(onx, {'info': 'unit test'}) onx = onnx_rename_weights(onx) inits = [ 'I0_coefficient', 'I1_intercepts', 'I2_coefficient1', 'I3_intercepts1' ] train_session = OrtGradientForwardBackwardOptimizer( onx, inits, weight_name='weight' if use_weight else None, learning_rate=LearningRateSGDNesterov(1e-4, nesterov=False, momentum=0.9), learning_loss=NegLogLearningLoss(), warm_start=False, max_iter=100, batch_size=10) self.assertIsInstance(train_session.learning_loss, NegLogLearningLoss) self.assertEqual(train_session.learning_loss.eps, 1e-5) if use_weight: train_session.fit(X_train, y_train, w_train) else: train_session.fit(X_train, y_train) temp = get_temp_folder( __file__, "temp_ort_gradient_optimizers_fw_nesterov_binary_mlp%d" % use_weight) train_session.save_onnx_graph(temp)
def test_onnx_rename_weights(self): N, D_in, D_out, H = 3, 3, 3, 3 var = [('X', FloatTensorType([N, D_in]))] w1 = numpy.random.randn(D_in, H).astype(numpy.float32) w2 = numpy.random.randn(H, D_out).astype(numpy.float32) opv = 14 onx_alg = OnnxMatMul( OnnxRelu(OnnxMatMul(*var, w1, op_version=opv), op_version=opv), w2, op_version=opv, output_names=['Y']) onx = onx_alg.to_onnx( var, target_opset=opv, outputs=[('Y', FloatTensorType())]) onx = onnx_rename_weights(onx) names = [init.name for init in onx.graph.initializer] self.assertEqual(['I0_Ma_MatMulcst', 'I1_Ma_MatMulcst1'], names) self.assertEqual(get_onnx_opset(onx), 14) self.assertRaise(lambda: get_onnx_opset(onx, "H"), ValueError)
def wtest_ort_gradient_optimizers_score_reg(self, use_weight=False): from onnxcustom.training.optimizers_partial import ( OrtGradientForwardBackwardOptimizer) from onnxcustom.training.sgd_learning_rate import (LearningRateSGD) from onnxcustom.training.sgd_learning_loss import SquareLearningLoss X = numpy.arange(60).astype(numpy.float32).reshape((-1, 3)) y = numpy.arange(X.shape[0]).astype(numpy.float32).reshape((-1, 1)) y[0, 0] += 1 y[-1, 0] += 1 w = (numpy.random.rand(X.shape[0]) + 1).astype(numpy.float32) X_train, _, y_train, __, w_train, ___ = train_test_split(X, y, w) reg = SGDRegressor(max_iter=20) reg.fit(X_train, y_train.ravel()) onx = to_onnx(reg, X_train, target_opset=opset, black_op={'LinearRegressor'}) onx = onnx_rename_weights(onx) inits = ['I0_coef', 'I1_intercept'] model = OrtGradientForwardBackwardOptimizer( onx, inits, weight_name='weight' if use_weight else None, learning_rate=LearningRateSGD(1e-4), learning_loss=SquareLearningLoss(), warm_start=False, max_iter=20, batch_size=10) if use_weight: model.fit(X_train, y_train, w_train) losses = model.losses(X_train, y_train, w_train) score = model.score(X_train, y_train, w_train) else: model.fit(X_train, y_train) losses = model.losses(X_train, y_train) score = model.score(X_train, y_train) self.assertEqual(losses.shape[0], y_train.shape[0]) self.assertFalse(any(map(numpy.isnan, losses))) self.assertIsInstance(score, numbers.Number) params = model.get_params() self.assertIsInstance(params['device'], str)
def benchmark(N=1000, n_features=100, hidden_layer_sizes="50,50", max_iter=500, learning_rate_init=1e-8, batch_size=15, run_skl=True, device='cpu', opset=14): """ Compares :epkg:`onnxruntime-training` to :epkg:`scikit-learn` for training. Training algorithm is SGD. :param N: number of observations to train on :param n_features: number of features :param hidden_layer_sizes: hidden layer sizes, comma separated values :param max_iter: number of iterations :param learning_rate_init: initial learning rate :param batch_size: batch size :param run_skl: train scikit-learn in the same condition (True) or just walk through one iterator with *scikit-learn* :param device: `'cpu'` or `'cuda'` :param opset: opset to choose for the conversion """ N = int(N) n_features = int(n_features) max_iter = int(max_iter) learning_rate_init = float(learning_rate_init) batch_size = int(batch_size) run_skl = run_skl in (1, True, '1', 'True') print("N=%d" % N) print("n_features=%d" % n_features) print(f"hidden_layer_sizes={hidden_layer_sizes!r}") print("max_iter=%d" % max_iter) print(f"learning_rate_init={learning_rate_init:f}") print("batch_size=%d" % batch_size) print(f"run_skl={run_skl!r}") print(f"opset={opset!r}") print(f"device={device!r}") print('------------------') if not isinstance(hidden_layer_sizes, tuple): hidden_layer_sizes = tuple(map(int, hidden_layer_sizes.split(","))) X, y = make_regression(N, n_features=n_features, bias=2) X = X.astype(numpy.float32) y = y.astype(numpy.float32) X_train, X_test, y_train, y_test = train_test_split(X, y) nn = MLPRegressor(hidden_layer_sizes=hidden_layer_sizes, max_iter=max_iter if run_skl else 1, solver='sgd', learning_rate_init=learning_rate_init, n_iter_no_change=max_iter, batch_size=batch_size, alpha=0, nesterovs_momentum=False, momentum=0, learning_rate="invscaling") begin = time.perf_counter() with warnings.catch_warnings(): warnings.simplefilter('ignore') nn.fit(X_train, y_train) dur_skl = time.perf_counter() - begin print("time_skl=%r, mean_squared_error=%r" % (dur_skl, mean_squared_error(y_train, nn.predict(X_train)))) # conversion to ONNX onx = to_onnx(nn, X_train[:1].astype(numpy.float32), target_opset=opset) onx = onnx_rename_weights(onx) # list of weights weights = get_train_initializer(onx) print('weights:', list(sorted(weights))) # training print(f"device={device!r} get_device()={get_device()!r}") ####################################### # The training session. train_session = OrtGradientForwardBackwardOptimizer( onx, list(weights), device=device, verbose=0, learning_rate=learning_rate_init, warm_start=False, max_iter=max_iter, batch_size=batch_size) begin = time.perf_counter() train_session.fit(X, y) dur_ort = time.perf_counter() - begin print("time_skl=%r, mean_squared_error=%r" % (dur_skl, mean_squared_error(y_train, nn.predict(X_train)))) print("time_ort=%r, last_trained_error=%r" % (dur_ort, train_session.train_losses_[-1]))
nn = MLPRegressor(hidden_layer_sizes=(50, 10), max_iter=max_iter, solver='sgd', learning_rate_init=5e-4, alpha=0, n_iter_no_change=max_iter * 3, batch_size=batch_size, nesterovs_momentum=False, momentum=0, learning_rate="invscaling") with warnings.catch_warnings(): warnings.simplefilter('ignore') nn.fit(X_train, y_train) ######################################## # Conversion to ONNX and trainer initialization onx = to_onnx(nn, X_train[:1].astype(numpy.float32), target_opset=15) onx = onnx_rename_weights(onx) train_session = OrtGradientForwardBackwardOptimizer( onx, device='cpu', learning_rate=1e-5, warm_start=False, max_iter=max_iter, batch_size=batch_size) benches = [benchmark(X_train, y_train, nn, train_session, name='NN-CPU')] ###################################### # Profiling # +++++++++ def clean_name(text): pos = text.find('onnxruntime')
def test_gradient_mlpregressor(self): from onnxcustom.training.optimizers_partial import ( OrtGradientForwardBackwardOptimizer) X = numpy.arange(30).reshape((-1, 3)).astype(numpy.float32) / 100 y = numpy.arange(X.shape[0]).astype(numpy.float32) y = y.reshape((-1, 1)) reg = MLPRegressor(hidden_layer_sizes=(5,), max_iter=2, activation='logistic', momentum=0, nesterovs_momentum=False, alpha=0) reg.fit(X, y.ravel()) onx = to_onnx(reg, X, target_opset=opset) onx = onnx_rename_weights(onx) inits = ["I0_coefficient", 'I1_intercepts', 'I2_coefficient1', 'I3_intercepts1'] xp = numpy.arange(2 * X.shape[1]).reshape((2, -1)).astype( numpy.float32) / 10 yp = numpy.array([0.5, -0.5], dtype=numpy.float32).reshape((-1, 1)) train_session = OrtGradientForwardBackwardOptimizer( onx, inits, learning_rate=1e-5, warm_start=True, max_iter=2, batch_size=10) train_session.fit(X, y) state = train_session.get_state() state_np = [st.numpy() for st in state] # gradient scikit-learn coef_grads = state_np[::2] intercept_grads = state_np[1::2] layer_units = [3, 5, 1] activations = [xp] + [None] * (len(layer_units) - 1) deltas = [None] * (len(activations) - 1) skl_pred = reg.predict(xp) batch_loss, coef_grads, intercept_grads = reg._backprop( # pylint: disable=W0212 xp, yp, activations, deltas, coef_grads, intercept_grads) deltas = activations[-1] - yp # gradient onnxcustom ort_xp = C_OrtValue.ortvalue_from_numpy(xp, train_session.device) ort_yp = C_OrtValue.ortvalue_from_numpy(yp, train_session.device) ort_state = [ort_xp] + state prediction = train_session.train_function_.forward( ort_state, training=True) ort_pred = prediction[0].numpy() self.assertEqualArray(skl_pred.ravel(), ort_pred.ravel(), decimal=2) loss, loss_gradient = train_session.learning_loss.loss_gradient( train_session.device, ort_yp, prediction[0]) gradient = train_session.train_function_.backward([loss_gradient]) # comparison self.assertEqualArray( batch_loss, loss.numpy() / xp.shape[0], decimal=3) self.assertEqualArray(deltas, loss_gradient.numpy(), decimal=3) # do not use iterator for gradient, it may crash ort_grad = [gradient[i].numpy() / xp.shape[0] for i in range(len(gradient))][1:] self.assertEqualArray( intercept_grads[1], ort_grad[3].ravel(), decimal=2) self.assertEqualArray(coef_grads[1], ort_grad[2], decimal=2) self.assertEqualArray( intercept_grads[0], ort_grad[1].ravel(), decimal=2) self.assertEqualArray(coef_grads[0], ort_grad[0], decimal=2)