def test_get_train_initializer(self): from onnxcustom.utils.orttraining_helper import get_train_initializer X, y = make_regression( # pylint: disable=W0632 100, n_features=10, bias=2) X = X.astype(numpy.float32) y = y.astype(numpy.float32) X_train, _, y_train, __ = train_test_split(X, y) reg = LinearRegression() reg.fit(X_train, y_train) reg.coef_ = reg.coef_.reshape((1, -1)) onx = to_onnx(reg, X_train, target_opset=opset, black_op={'LinearRegressor'}) inits = get_train_initializer(onx) self.assertEqual({'intercept', 'coef'}, set(inits))
def benchmark(N=1000, n_features=100, hidden_layer_sizes="50,50", max_iter=500, learning_rate_init=1e-8, batch_size=15, run_skl=True, device='cpu', opset=14): """ Compares :epkg:`onnxruntime-training` to :epkg:`scikit-learn` for training. Training algorithm is SGD. :param N: number of observations to train on :param n_features: number of features :param hidden_layer_sizes: hidden layer sizes, comma separated values :param max_iter: number of iterations :param learning_rate_init: initial learning rate :param batch_size: batch size :param run_skl: train scikit-learn in the same condition (True) or just walk through one iterator with *scikit-learn* :param device: `'cpu'` or `'cuda'` :param opset: opset to choose for the conversion """ N = int(N) n_features = int(n_features) max_iter = int(max_iter) learning_rate_init = float(learning_rate_init) batch_size = int(batch_size) run_skl = run_skl in (1, True, '1', 'True') print("N=%d" % N) print("n_features=%d" % n_features) print(f"hidden_layer_sizes={hidden_layer_sizes!r}") print("max_iter=%d" % max_iter) print(f"learning_rate_init={learning_rate_init:f}") print("batch_size=%d" % batch_size) print(f"run_skl={run_skl!r}") print(f"opset={opset!r}") print(f"device={device!r}") print('------------------') if not isinstance(hidden_layer_sizes, tuple): hidden_layer_sizes = tuple(map(int, hidden_layer_sizes.split(","))) X, y = make_regression(N, n_features=n_features, bias=2) X = X.astype(numpy.float32) y = y.astype(numpy.float32) X_train, X_test, y_train, y_test = train_test_split(X, y) nn = MLPRegressor(hidden_layer_sizes=hidden_layer_sizes, max_iter=max_iter if run_skl else 1, solver='sgd', learning_rate_init=learning_rate_init, n_iter_no_change=max_iter, batch_size=batch_size, alpha=0, nesterovs_momentum=False, momentum=0, learning_rate="invscaling") begin = time.perf_counter() with warnings.catch_warnings(): warnings.simplefilter('ignore') nn.fit(X_train, y_train) dur_skl = time.perf_counter() - begin print("time_skl=%r, mean_squared_error=%r" % (dur_skl, mean_squared_error(y_train, nn.predict(X_train)))) # conversion to ONNX onx = to_onnx(nn, X_train[:1].astype(numpy.float32), target_opset=opset) onx = onnx_rename_weights(onx) # list of weights weights = get_train_initializer(onx) print('weights:', list(sorted(weights))) # training print(f"device={device!r} get_device()={get_device()!r}") ####################################### # The training session. train_session = OrtGradientForwardBackwardOptimizer( onx, list(weights), device=device, verbose=0, learning_rate=learning_rate_init, warm_start=False, max_iter=max_iter, batch_size=batch_size) begin = time.perf_counter() train_session.fit(X, y) dur_ort = time.perf_counter() - begin print("time_skl=%r, mean_squared_error=%r" % (dur_skl, mean_squared_error(y_train, nn.predict(X_train)))) print("time_ort=%r, last_trained_error=%r" % (dur_ort, train_session.train_losses_[-1]))
sess = InferenceSession(onx_train.SerializeToString(), providers=['CPUExecutionProvider']) res = sess.run(None, {'X': X_test, 'label': y_test.reshape((-1, 1))}) print(f"onnx loss={res[0][0, 0] / X_test.shape[0]!r}") ##################################### # Weights # +++++++ # # Every initializer is a set of weights which can be trained # and a gradient will be computed for it. # However an initializer used to modify a shape or to # extract a subpart of a tensor does not need training. # Let's remove them from the list of initializer to train. inits = get_train_initializer(onx) weights = {k: v for k, v in inits.items() if k != "shape_tensor"} pprint(list((k, v[0].shape) for k, v in weights.items())) ##################################### # Train on CPU or GPU if available # ++++++++++++++++++++++++++++++++ device = "cuda" if get_device().upper() == 'GPU' else 'cpu' print(f"device={device!r} get_device()={get_device()!r}") ####################################### # Stochastic Gradient Descent # +++++++++++++++++++++++++++ # # The training logic is hidden in class
print(nn.loss_curve_) ################################# # Score: print(f"mean_squared_error={mean_squared_error(y_test, nn.predict(X_test))!r}") ####################################### # Conversion to ONNX # ++++++++++++++++++ onx = to_onnx(nn, X_train[:1].astype(numpy.float32), target_opset=15) plot_onnxs(onx) weights = list(sorted(get_train_initializer(onx))) print(weights) ####################################### # Training graph with forward backward # ++++++++++++++++++++++++++++++++++++ # device = "cuda" if get_device().upper() == 'GPU' else 'cpu' print(f"device={device!r} get_device()={get_device()!r}") onx = onnx_rename_weights(onx) train_session = OrtGradientForwardBackwardOptimizer( onx, device=device, verbose=1,