def test_regressor(loop): # noqa with cluster() as (s, [a, b]): with Client(s['address'], loop=loop): a = dlgbm.LGBMRegressor(local_listen_port=16400) X2 = da.from_array(X, 5) y2 = da.from_array(y, 5) a.fit(X2, y2) p1 = a.predict(X2) b = lightgbm.LGBMRegressor() b.fit(X, y) assert_eq(p1, b.predict(X))
def test_regressor_local_predict(client, listen_port): # noqa X, y, w, dX, dy, dw = _create_data('regression', output='array') a = dlgbm.LGBMRegressor(local_listen_port=listen_port, seed=42) a = a.fit(dX, dy, sample_weight=dw, client=client) p1 = a.predict(dX) p2 = a.to_local().predict(X) s1 = r2_score(dy, p1) p1 = p1.compute() s2 = a.to_local().score(X, y) print(s1) # Predictions and scores should be the same assert_eq(p1, p2) np.isclose(s1, s2)
def test_regressor_local_predict(loop, listen_port): with cluster() as (s, [a, b]): with Client(s['address'], loop=loop): X, y, w, dX, dy, dw = _create_data('regression', output="array") a = dlgbm.LGBMRegressor(local_listen_port=listen_port, seed=42) a = a.fit(dX, dy, sample_weight=dw) p1 = a.predict(dX) p2 = a.to_local().predict(X) s1 = r2_score(dy, p1) p1 = p1.compute() s2 = a.to_local().score(X, y) print(s1) # Predictions and scores should be the same assert_eq(p1, p2) np.isclose(s1, s2)
def test_regressor_quantile(loop, output, listen_port, alpha): with cluster() as (s, [a, b]): with Client(s['address'], loop=loop) as client: X, y, w, dX, dy, dw = _create_data('regression', output=output) a = dlgbm.LGBMRegressor(local_listen_port=listen_port, seed=42, objective='quantile', alpha=alpha) a = a.fit(dX, dy, client=client, sample_weight=dw) p1 = a.predict(dX, client=client).compute() q1 = np.count_nonzero(y < p1) / y.shape[0] b = lightgbm.LGBMRegressor(seed=42, objective='quantile', alpha=alpha) b.fit(X, y, sample_weight=w) p2 = b.predict(X) q2 = np.count_nonzero(y < p2) / y.shape[0] # Quantiles should be right np.isclose(q1, alpha, atol=.1) np.isclose(q2, alpha, atol=.1)
def test_regress_newsread(client, listen_port): data = dd.read_csv("./system_tests/data/*.gz", compression="gzip", blocksize=None) dX = data.iloc[:, 1:] dy = data.iloc[:, 0] d_regress = dlgbm.LGBMRegressor(n_estimators=50, local_listen_port=listen_port) d_regress.fit(dX, dy) dy_pred = d_regress.predict(dX, client=client) # The dask_ml.metrics.r2_score method fails with dataframes so we compute the R2 score ourselves numerator = ((dy - dy_pred)**2).sum() denominator = ((dy - dy.mean())**2).sum() r2_score = 1 - numerator / denominator r2_score = r2_score.compute() print(r2_score) assert r2_score > 0.8
def test_regressor(output, client, listen_port): # noqa X, y, w, dX, dy, dw = _create_data('regression', output=output) a = dlgbm.LGBMRegressor(time_out=5, local_listen_port=listen_port, seed=42) a = a.fit(dX, dy, client=client, sample_weight=dw) p1 = a.predict(dX, client=client) if output != 'dataframe': s1 = r2_score(dy, p1) p1 = p1.compute() b = lightgbm.LGBMRegressor(seed=42) b.fit(X, y, sample_weight=w) s2 = b.score(X, y) p2 = b.predict(X) # Scores should be the same if output != 'dataframe': assert_eq(s1, s2, atol=.01) # Predictions should be roughly the same assert_eq(y, p1, rtol=1., atol=50.) assert_eq(y, p2, rtol=1., atol=50.)