def test_get_ort_device_type_exc_2(self):
     dev = get_ort_device('cpu')
     self.assertEqual(get_ort_device_type(dev), 0)
     dev = get_ort_device('cuda')
     self.assertEqual(get_ort_device_type(dev), 1)
     self.assertRaise(lambda: get_ort_device_type(''), ValueError)
     self.assertRaise(lambda: get_ort_device_type(0), TypeError)
示例#2
0
 def __init__(self,
              model_onnx,
              weights_to_train,
              loss_output_name='loss',
              max_iter=100,
              training_optimizer_name='SGDOptimizer',
              batch_size=10,
              eta0=0.01,
              alpha=0.0001,
              power_t=0.25,
              learning_rate='invscaling',
              device='cpu',
              verbose=0):
     # See https://scikit-learn.org/stable/modules/generated/
     # sklearn.linear_model.SGDRegressor.html
     self.model_onnx = model_onnx
     self.batch_size = batch_size
     self.weights_to_train = weights_to_train
     self.loss_output_name = loss_output_name
     self.training_optimizer_name = training_optimizer_name
     self.verbose = verbose
     self.max_iter = max_iter
     self.eta0 = eta0
     self.alpha = alpha
     self.power_t = power_t
     self.learning_rate = learning_rate.lower()
     self.device = get_ort_device(device)
示例#3
0
 def __init__(self, X, y, batch_size=20, device='cpu'):
     if len(y.shape) == 1:
         y = y.reshape((-1, 1))
     if X.shape[0] != y.shape[0]:
         raise ValueError(
             f"Shape mismatch X.shape={X.shape!r}, y.shape={y.shape!r}.")
     self.X = numpy.ascontiguousarray(X)
     self.y = numpy.ascontiguousarray(y)
     self.batch_size = batch_size
     self.device = get_ort_device(device)
 def test_ort_device_to_string(self):
     for value in [
             'cpu', 'cuda', ('gpu', 'cuda'), ('gpu:0', 'cuda'),
         ('cuda:0', 'cuda'), ('gpu:1', 'cuda:1'), 'cuda:1'
     ]:
         with self.subTest(device=value):
             if isinstance(value, str):
                 a, b = value, value
             else:
                 a, b = value
             dev = get_ort_device(a)
             back = ort_device_to_string(dev)
             self.assertEqual(b, back)
示例#5
0
    def test_print_ortvalue(self):
        expected = ("device=Cpu dtype=dtype('float32') shape=(1, 4) "
                    "value=[0.0, 1.0, 4.0, 4.5]")
        value = numpy.array([[0, 1, 4, 4.5]], dtype=numpy.float32)
        dev = get_ort_device('cpu')
        ort = C_OrtValue.ortvalue_from_numpy(value, dev)
        text = str_ortvalue(ort)
        self.assertEqual(expected, text)
        text = str_ortvalue(ort)  # pylint: disable=W0212
        self.assertEqual(expected, text)

        expected = ("device=Cpu dtype=dtype('int64') shape=(100,) "
                    "value=[0, 1, 2, 3, 4, '...', 95, 96, 97, 98, 99]")
        value = numpy.arange(100).astype(numpy.int64)
        ort = C_OrtValue.ortvalue_from_numpy(value, dev)
        text = str_ortvalue(ort)  # pylint: disable=W0212
        self.assertEqual(expected, text)
示例#6
0

train_session = create_training_session(onx_train, ['coefs', 'intercept'],
                                        device=device)
print(train_session)

##########################################
# The coefficients.

state_tensors = train_session.get_state()
pprint(state_tensors)

######################################
# We can now check the coefficients are updated after one iteration.

dev = get_ort_device(device)
ortx = C_OrtValue.ortvalue_from_numpy(X_train[:1], dev)
orty = C_OrtValue.ortvalue_from_numpy(y_train[:1].reshape((-1, 1)), dev)
ortlr = C_OrtValue.ortvalue_from_numpy(
    numpy.array([0.01], dtype=numpy.float32), dev)

bind = train_session.io_binding()._iobinding
bind.bind_ortvalue_input('X', ortx)
bind.bind_ortvalue_input('label', orty)
bind.bind_ortvalue_input('Learning_Rate', ortlr)
bind.bind_output('loss', dev)
train_session._sess.run_with_iobinding(bind, None)
outputs = bind.copy_outputs_to_cpu()
pprint(outputs)

##########################################
示例#7
0
def build_ort_op(op_version=14, save=None, **kwargs):  # opset=13, 14, ...
    slices = kwargs['slices']
    slice1, slice2 = slices
    slice1 = slice(0, None) if slice1 is None else slice(*slice1)
    slice2 = slice(0, None) if slice2 is None else slice(*slice2)

    axes = []
    starts = []
    ends = []
    for i in [0, 1]:
        if slices[i] is None:
            continue
        axes.append(i)
        starts.append(slices[i][0])
        ends.append(slices[i][1])
    starts = numpy.array(starts, dtype=numpy.int64)
    ends = numpy.array(ends, dtype=numpy.int64)
    axes = numpy.array(axes, dtype=numpy.int64)
    node1 = OnnxSlice('X', starts, ends, axes, op_version=op_version)
    node2 = OnnxAdd(node1,
                    numpy.array([1], dtype=numpy.float32),
                    op_version=op_version)
    node3 = OnnxSlice(node2, starts, ends, axes, op_version=op_version)
    node4 = OnnxMul(node3,
                    numpy.array([2], dtype=numpy.float32),
                    op_version=op_version,
                    output_names=['Y'])
    onx = node4.to_onnx(inputs=[('X', FloatTensorType([None, None]))],
                        target_opset=op_version)
    sess = InferenceSession(onx.SerializeToString(),
                            providers=["CPUExecutionProvider"])
    if save is not None:
        with open(save, "wb") as f:
            f.write(onx.SerializeToString())

    def npy_fct(x):
        return ((x[slice1, slice2] + 1)[slice1, slice2] * 2).copy()

    rnd = numpy.random.randn(10, 10).astype(numpy.float32)
    expected = npy_fct(rnd)
    got = sess.run(None, {'X': rnd})[0]
    try:
        assert_almost_equal(expected, got)
    except AssertionError as e:
        raise AssertionError("kwargs=%r slice1=%r slice2=%r shapes=%r ? %r "
                             "(x[slice1, slice2].shape)=%r" %
                             (kwargs, slice1, slice2, expected.shape,
                              got.shape, rnd[slice1, slice2].shape)) from e

    if get_device().upper() == 'GPU':
        sessg = InferenceSession(onx.SerializeToString(),
                                 providers=["CUDAExecutionProvider"])
        io_binding = sessg.io_binding()._iobinding
        device = get_ort_device('cuda:0')

        def run_gpu(x):
            io_binding.bind_input('X', device, numpy.float32, x.shape(),
                                  x.data_ptr())
            io_binding.bind_output('Y', device)
            return sessg._sess.run_with_iobinding(io_binding, None)

        return onx, lambda x: sess.run(None, {'X': x}), npy_fct, run_gpu
    else:
        return onx, lambda x: sess.run(None, {'X': x}), npy_fct, None
示例#8
0
def benchmark_op(repeat=10,
                 number=10,
                 name="Slice",
                 shape_slice_fct=None,
                 save=None,
                 opset=14,
                 repeat_profile=1500,
                 verbose=1):
    if verbose:
        print("[benchmark_op] start repeat=%d number=%d repeat_profile=%d"
              " opset=%d." % (repeat, number, repeat_profile, opset))
    res = []
    for dim in tqdm([
            8, 16, 32, 64, 100, 128, 200, 256, 400, 512, 600, 784, 800, 1000,
            1024, 1200
    ]):
        shape, slices = shape_slice_fct(dim)
        onx, ort_fct, npy_fct, ort_fct_gpu = build_ort_op(save=save,
                                                          op_version=opset,
                                                          slices=slices)

        n_arrays = 20
        if dim >= 512:
            n_arrays = 10
        xs = [
            numpy.random.rand(*shape).astype(numpy.float32)
            for _ in range(n_arrays)
        ]
        info = dict(shape=shape)

        ctx = dict(xs=xs, loop_fct=loop_fct)

        # numpy
        ctx['fct'] = npy_fct
        obs = measure_time(lambda: loop_fct(npy_fct, xs),
                           div_by_number=True,
                           context=ctx,
                           repeat=repeat,
                           number=number)
        obs['dim'] = dim
        obs['fct'] = 'numpy'
        obs['shape'] = ",".join(map(str, shape))
        obs['slices'] = str(slices)
        obs.update(info)
        res.append(obs)

        # onnxruntime
        ctx['fct'] = ort_fct
        obs = measure_time(lambda: loop_fct(ort_fct, xs),
                           div_by_number=True,
                           context=ctx,
                           repeat=repeat,
                           number=number)
        obs['dim'] = dim
        obs['fct'] = 'ort'
        obs['shape'] = ",".join(map(str, shape))
        obs['slices'] = str(slices)
        obs.update(info)
        res.append(obs)

        if ort_fct_gpu is not None:

            # onnxruntime
            dev = get_ort_device('cuda:0')
            ctx['xs'] = [C_OrtValue.ortvalue_from_numpy(x, dev) for x in xs]
            ctx['fct'] = ort_fct_gpu
            obs = measure_time(lambda: loop_fct(ort_fct_gpu, ctx['xs']),
                               div_by_number=True,
                               context=ctx,
                               repeat=repeat,
                               number=number)
            obs['dim'] = dim
            obs['fct'] = 'ort_gpu'
            obs['shape'] = ",".join(map(str, shape))
            obs['slices'] = str(slices)
            obs.update(info)
            res.append(obs)

    # profiling CPU
    if verbose:
        print("[benchmark_op] done.")
        print("[benchmark_op] profile CPU.")
    so = SessionOptions()
    so.enable_profiling = True
    sess = InferenceSession(onx.SerializeToString(),
                            so,
                            providers=["CPUExecutionProvider"])
    for i in range(0, repeat_profile):
        sess.run(
            None,
            {'X': xs[-1]},
        )
    prof = sess.end_profiling()
    with open(prof, "r") as f:
        js = json.load(f)
    dfprof = DataFrame(OnnxWholeSession.process_profiling(js))
    dfprof['shape'] = ",".join(map(str, shape))
    dfprof['slices'] = str(slices)
    if verbose:
        print("[benchmark_op] done.")

    # profiling CPU
    if ort_fct_gpu is not None:
        if verbose:
            print("[benchmark_op] profile GPU.")
        so = SessionOptions()
        so.enable_profiling = True
        sess = InferenceSession(onx.SerializeToString(),
                                so,
                                providers=["CUDAExecutionProvider"])
        io_binding = sess.io_binding()._iobinding
        device = get_ort_device('cpu')

        for i in range(0, repeat_profile):
            x = ctx['xs'][-1]
            io_binding.bind_input('X', device, numpy.float32, x.shape(),
                                  x.data_ptr())
            io_binding.bind_output('Y', device)
            sess._sess.run_with_iobinding(io_binding, None)

        prof = sess.end_profiling()
        with open(prof, "r") as f:
            js = json.load(f)
        dfprofgpu = DataFrame(OnnxWholeSession.process_profiling(js))
        dfprofgpu['shape'] = ",".join(map(str, shape))
        dfprofgpu['slices'] = str(slices)
        if verbose:
            print("[benchmark_op] profile done.")
    else:
        dfprofgpu = None

    # Dataframes
    shape_name = str(shape).replace(str(dim), "N")
    df = pandas.DataFrame(res)
    piv = df.pivot('shape', 'fct', 'average')

    rs = piv.copy()
    for c in ['numpy', 'ort', 'ort_gpu']:
        if c in rs.columns:
            rs[f"numpy/{c}"] = rs['numpy'] / rs[c]
    rs = rs[[c for c in rs.columns if "/numpy" not in c]].copy()

    # Graphs.
    fig, ax = plt.subplots(1, 2, figsize=(12, 4))
    piv.plot(logx=True,
             logy=True,
             ax=ax[0],
             title=f"{name} benchmark\n{shape_name!r} lower better")
    ax[0].legend(prop={"size": 9})
    rs.plot(
        logx=True,
        logy=True,
        ax=ax[1],
        title=f"{name} Speedup, baseline=numpy\n{shape_name!r} higher better")
    ax[1].plot([min(rs.index), max(rs.index)], [0.5, 0.5], 'g--')
    ax[1].plot([min(rs.index), max(rs.index)], [2., 2.], 'g--')
    ax[1].legend(prop={"size": 9})
    return dfprof, dfprofgpu, df, rs, ax
示例#9
0
#################################
# With onnxruntime.

sess = InferenceSession(onx.SerializeToString(),
                        providers=["CPUExecutionProvider"])
y_cpu = sess.run(None, {'X': x})[0]

#######################################
# Execution on GPU
# ++++++++++++++++
#
# If available...

if get_device().upper() == 'GPU':
    dev = get_ort_device('cuda:0')
    try:
        gx = C_OrtValue.ortvalue_from_numpy(x, dev)
        cuda = True
    except RuntimeError as e:
        print(e)
        cuda = False
else:
    cuda = False

if cuda:
    sessg = InferenceSession(onx.SerializeToString(),
                             providers=["CUDAExecutionProvider"])

    io_binding = sessg.io_binding()._iobinding
    io_binding.bind_input('X', dev, numpy.float32, gx.shape(), gx.data_ptr())