def test_ivfsq_pred(qtype, encodeResidual, nrows, ncols, n_neighbors, nlist): algo_params = { 'nlist': nlist, 'nprobe': nlist * 0.25, 'qtype': qtype, 'encodeResidual': encodeResidual } X, y = make_blobs(n_samples=nrows, centers=5, n_features=ncols, random_state=0) logger.set_level(logger.level_debug) knn_cu = cuKNN(algorithm="ivfsq", algo_params=algo_params) knn_cu.fit(X) neigh_ind = knn_cu.kneighbors(X, n_neighbors=n_neighbors, return_distance=False) del knn_cu gc.collect() labels, probs = predict(neigh_ind, y, n_neighbors) assert array_equal(labels, y)
def test_redirected_logger(): new_stdout = StringIO() with logger.set_level(logger.level_trace): # We do not test trace because CUML_LOG_TRACE is not compiled by # default test_msg = "This is a debug message" with redirect_stdout(new_stdout): logger.debug(test_msg) assert test_msg in new_stdout.getvalue() test_msg = "This is an info message" with redirect_stdout(new_stdout): logger.info(test_msg) assert test_msg in new_stdout.getvalue() test_msg = "This is a warn message" with redirect_stdout(new_stdout): logger.warn(test_msg) assert test_msg in new_stdout.getvalue() test_msg = "This is an error message" with redirect_stdout(new_stdout): logger.error(test_msg) assert test_msg in new_stdout.getvalue() test_msg = "This is a critical message" with redirect_stdout(new_stdout): logger.critical(test_msg) assert test_msg in new_stdout.getvalue() # Check that logging does not error with sys.stdout of None with redirect_stdout(None): test_msg = "This is a debug message" logger.debug(test_msg)
def test_nearest_neighbors_sparse(shape, metric, n_neighbors, batch_size_index, batch_size_query): nrows, ncols, density = shape if nrows == 1 and n_neighbors > 1: return a = cp.sparse.random(nrows, ncols, format='csr', density=density, random_state=35) b = cp.sparse.random(nrows, ncols, format='csr', density=density, random_state=38) if metric == 'jaccard': a = a.astype('bool').astype('float32') b = b.astype('bool').astype('float32') logger.set_level(logger.level_debug) nn = cuKNN(metric=metric, p=2.0, n_neighbors=n_neighbors, algorithm="brute", output_type="numpy", verbose=logger.level_debug, algo_params={"batch_size_index": batch_size_index, "batch_size_query": batch_size_query}) nn.fit(a) cuD, cuI = nn.kneighbors(b) if metric not in sklearn.neighbors.VALID_METRICS_SPARSE['brute']: a = a.todense() b = b.todense() sknn = skKNN(metric=metric, p=2.0, n_neighbors=n_neighbors, algorithm="brute", n_jobs=-1) sk_X = a.get() sknn.fit(sk_X) skD, skI = sknn.kneighbors(b.get()) cp.testing.assert_allclose(cuD, skD, atol=1e-3, rtol=1e-3) # Jaccard & Chebyshev have a high potential for mismatched indices # due to duplicate distances. We can ignore the indices in this case. if metric not in ['jaccard', 'chebyshev']: cp.testing.assert_allclose(cuI, skI, atol=1e-4, rtol=1e-4)
def test_logger(): logger.trace("This is a trace message") logger.debug("This is a debug message") logger.info("This is an info message") logger.warn("This is a warn message") logger.error("This is a error message") logger.critical("This is a critical message") with logger.set_level(logger.level_warn): assert (logger.should_log_for(logger.level_warn)) assert (not logger.should_log_for(logger.level_info)) with logger.set_pattern("%v"): logger.info("This is an info message")
def test_nearest_neighbors_sparse(nrows, ncols, density, metric, n_neighbors, batch_size_index, batch_size_query): if nrows == 1 and n_neighbors > 1: return a = cp.sparse.random(nrows, ncols, format='csr', density=density, random_state=32) logger.set_level(logger.level_info) nn = cuKNN(metric=metric, n_neighbors=n_neighbors, algorithm="brute", verbose=logger.level_debug, algo_params={ "batch_size_index": batch_size_index, "batch_size_query": batch_size_query }) nn.fit(a) cuD, cuI = nn.kneighbors(a) sknn = skKNN(metric=metric, n_neighbors=n_neighbors, algorithm="brute", n_jobs=-1) sk_X = a.get() sknn.fit(sk_X) skD, skI = sknn.kneighbors(sk_X) cp.testing.assert_allclose(cuI, skI, atol=1e-4, rtol=1e-4) cp.testing.assert_allclose(cuD, skD, atol=1e-3, rtol=1e-3)
def test_log_flush(): stdout_buffer = BytesIO() new_stdout = TextIOWrapper(stdout_buffer) with logger.set_level(logger.level_trace): test_msg = "This is a debug message" with redirect_stdout(new_stdout): logger.debug(test_msg) assert test_msg not in stdout_buffer.getvalue().decode('utf-8') logger.flush() assert test_msg in stdout_buffer.getvalue().decode('utf-8') # Check that logging flush does not error with sys.stdout of None with redirect_stdout(None): logger.flush()
def test_nearest_neighbors_sparse(metric, nrows, ncols, density, n_neighbors, batch_size_index, batch_size_query): if nrows == 1 and n_neighbors > 1: return a = cupyx.scipy.sparse.random(nrows, ncols, format='csr', density=density, random_state=35) b = cupyx.scipy.sparse.random(nrows, ncols, format='csr', density=density, random_state=38) if metric == 'jaccard': a = a.astype('bool').astype('float32') b = b.astype('bool').astype('float32') logger.set_level(logger.level_debug) nn = cuKNN(metric=metric, p=2.0, n_neighbors=n_neighbors, algorithm="brute", output_type="numpy", verbose=logger.level_debug, algo_params={ "batch_size_index": batch_size_index, "batch_size_query": batch_size_query }) nn.fit(a) cuD, cuI = nn.kneighbors(b) if metric not in sklearn.neighbors.VALID_METRICS_SPARSE['brute']: a = a.todense() b = b.todense() sknn = skKNN(metric=metric, p=2.0, n_neighbors=n_neighbors, algorithm="brute", n_jobs=-1) sk_X = a.get() sknn.fit(sk_X) skD, skI = sknn.kneighbors(b.get()) # For some reason, this will occasionally fail w/ a single # mismatched element in CI. Allowing the single mismatch for now. cp.testing.assert_allclose(cuD, skD, atol=1e-5, rtol=1e-5) # Jaccard & Chebyshev have a high potential for mismatched indices # due to duplicate distances. We can ignore the indices in this case. if metric not in ['jaccard', 'chebyshev']: # The actual neighbors returned in the presence of duplicate distances # is non-deterministic. If we got to this point, the distances all # match between cuml and sklearn. We set a reasonable threshold # (.5% in this case) to allow differences from non-determinism. diffs = abs(cuI - skI) assert (len(diffs[diffs > 0]) / len(np.ravel(skI))) <= 0.005
def random_state(): random_state = random.randint(0, 1e6) with logger.set_level(logger.level_debug): logger.debug("Random seed: {}".format(random_state)) return random_state