Пример #1
0
def get_handle(use_handle, n_streams=0):
    if not use_handle:
        return None, None
    h = cuml.Handle(n_streams)
    s = cuml.cuda.Stream()
    h.setStream(s)
    return h, s
Пример #2
0
def test_base_class_usage_with_handle():
    handle = cuml.Handle()
    stream = cuml.cuda.Stream()
    handle.setStream(stream)
    base = cuml.Base(handle=handle)
    base.handle.sync()
    del base
Пример #3
0
def test_svm_memleak_on_exception(params, n_rows=1000, n_iter=10,
                                  n_cols=1000, dataset='blobs'):
    """
    Test whether there is any mem leak when we exit training with an exception.
    The poly kernel with degree=30 will overflow, and triggers the
    'SMO error: NaN found...' exception.
    """
    X_train, y_train = make_blobs(n_samples=n_rows, n_features=n_cols,
                                  random_state=137, centers=2)
    X_train = X_train.astype(np.float32)
    stream = cuml.cuda.Stream()
    handle = cuml.Handle(stream=stream)

    # Warmup. Some modules that are used in SVC allocate space on the device
    # and consume memory. Here we make sure that this allocation is done
    # before the first call to get_memory_info.
    tmp = cu_svm.SVC(handle=handle, **params)
    with pytest.raises(RuntimeError):
        tmp.fit(X_train, y_train)
        # SMO error: NaN found during fitting.

    free_mem = cuda.current_context().get_memory_info()[0]

    # Main test loop
    for i in range(n_iter):
        cuSVC = cu_svm.SVC(handle=handle, **params)
        with pytest.raises(RuntimeError):
            cuSVC.fit(X_train, y_train)
            # SMO error: NaN found during fitting.

    del(cuSVC)
    handle.sync()
    delta_mem = free_mem - cuda.current_context().get_memory_info()[0]
    print("Delta GPU mem: {} bytes".format(delta_mem))
    assert delta_mem == 0
Пример #4
0
def run_classification(datatype, penalty, loss, dims, nclasses):

    t = time.perf_counter()
    nrows, ncols = dims
    X_train, X_test, y_train, y_test = make_classification_dataset(
        datatype, nrows, ncols, nclasses)
    logger.debug(f"Data generation time: {time.perf_counter() - t} s.")

    # solving in primal is not supported by sklearn for this loss type.
    skdual = loss == 'hinge' and penalty == 'l2'
    if loss == 'hinge' and penalty == 'l1':
        pytest.skip(
            "sklearn does not support this combination of loss and penalty")

    # limit the max iterations for sklearn to reduce the max test time
    cuit = 10000
    skit = max(10, min(cuit, cuit * 1000 / nrows))

    t = time.perf_counter()
    handle = cuml.Handle(n_streams=0)
    cum = cu.LinearSVC(handle=handle,
                       loss=loss,
                       penalty=penalty,
                       max_iter=cuit)
    cum.fit(X_train, y_train)
    cus = cum.score(X_test, y_test)
    handle.sync()
    t = time.perf_counter() - t
    logger.debug(f"Cuml time: {t} s.")
    t = max(5, t * SKLEARN_TIMEOUT_FACTOR)

    # cleanup cuml objects so that we can more easily fork the process
    # and test sklearn
    del cum
    X_train = X_train.get()
    X_test = X_test.get()
    y_train = y_train.get()
    y_test = y_test.get()
    gc.collect()

    try:

        def run_sklearn():
            skm = sk.LinearSVC(loss=loss,
                               penalty=penalty,
                               max_iter=skit,
                               dual=skdual)
            skm.fit(X_train, y_train)
            return skm.score(X_test, y_test)

        sks = with_timeout(timeout=t, target=run_sklearn)
        good_enough(cus, sks, nrows)
    except TimeoutError:
        pytest.skip(f"sklearn did not finish within {t} seconds.")
Пример #5
0
def test_svm_memleak(params,
                     n_rows,
                     n_iter,
                     n_cols,
                     use_handle,
                     dataset='blobs'):
    """
    Test whether there is any memory leak.

    .. note:: small `n_rows`, and `n_cols` values will result in small model
        size, that will not be measured by get_memory_info.

    """
    X_train, X_test, y_train, y_test = make_dataset(dataset, n_rows, n_cols)
    stream = cuml.cuda.Stream()
    handle = cuml.Handle()
    handle.setStream(stream)
    # Warmup. Some modules that are used in SVC allocate space on the device
    # and consume memory. Here we make sure that this allocation is done
    # before the first call to get_memory_info.
    tmp = cu_svm.SVC(handle=handle, **params)
    tmp.fit(X_train, y_train)
    ms = get_memsize(tmp)
    print("Memory consumtion of SVC object is {} MiB".format(ms /
                                                             (1024 * 1024.0)))

    free_mem = cuda.current_context().get_memory_info()[0]

    # Check first whether the get_memory_info gives us the correct memory
    # footprint
    cuSVC = cu_svm.SVC(handle=handle, **params)
    cuSVC.fit(X_train, y_train)
    delta_mem = free_mem - cuda.current_context().get_memory_info()[0]
    assert delta_mem >= ms

    # Main test loop
    b_sum = 0
    for i in range(n_iter):
        cuSVC = cu_svm.SVC(handle=handle, **params)
        cuSVC.fit(X_train, y_train)
        b_sum += cuSVC.intercept_
        cuSVC.predict(X_train)

    del (cuSVC)
    handle.sync()
    delta_mem = free_mem - cuda.current_context().get_memory_info()[0]
    print("Delta GPU mem: {} bytes".format(delta_mem))
    assert delta_mem == 0
Пример #6
0
def test_base_class_usage_with_handle():
    stream = Stream()
    handle = cuml.Handle(stream=stream)
    base = cuml.Base(handle=handle)
    base.handle.sync()
    del base
Пример #7
0
def get_handle(use_handle, n_streams=0):
    if not use_handle:
        return None, None
    s = Stream()
    h = cuml.Handle(stream=s, n_streams=n_streams)
    return h, s