def _testMatrixCreation(self, in_x, in_y, **kwargs): if "sharding" not in kwargs: kwargs["sharding"] = RayShardingMode.BATCH mat = RayDMatrix(in_x, in_y, **kwargs) def _load_data(params): x = params["data"] y = params["label"] if isinstance(x, list): x = concat_dataframes(x) if isinstance(y, list): y = concat_dataframes(y) return x, y params = mat.get_data(rank=0, num_actors=1) x, y = _load_data(params) self.assertTrue(np.allclose(self.x, x)) self.assertTrue(np.allclose(self.y, y)) # Multi actor check mat = RayDMatrix(in_x, in_y, **kwargs) params = mat.get_data(rank=0, num_actors=2) x1, y1 = _load_data(params) mat.unload_data() params = mat.get_data(rank=1, num_actors=2) x2, y2 = _load_data(params) self.assertTrue(np.allclose(self.x, concat_dataframes([x1, x2]))) self.assertTrue(np.allclose(self.y, concat_dataframes([y1, y2])))
def _load_data(params): x = params["data"] y = params["label"] if isinstance(x, list): x = concat_dataframes(x) if isinstance(y, list): y = concat_dataframes(y) return x, y
def _testMatrixCreation(self, in_x, in_y, **kwargs): mat = RayDMatrix(in_x, in_y, **kwargs) params = mat.get_data(rank=0, num_actors=1) x = params["data"] y = params["label"] if isinstance(x, list): x = concat_dataframes(x) if isinstance(y, list): y = concat_dataframes(y) self.assertTrue(np.allclose(self.x, x)) self.assertTrue(np.allclose(self.y, y))
def _get_dmatrix(data: RayDMatrix, param: Dict) -> xgb.DMatrix: if isinstance(data, RayDeviceQuantileDMatrix): if isinstance(param["data"], list): dm_param = { "feature_names": data.feature_names, "feature_types": data.feature_types, "missing": data.missing, } if not isinstance(data, xgb.DeviceQuantileDMatrix): pass param.update(dm_param) it = RayDataIter(**param) matrix = xgb.DeviceQuantileDMatrix(it, **dm_param) else: matrix = xgb.DeviceQuantileDMatrix(**param) else: if isinstance(param["data"], list): dm_param = { "data": concat_dataframes(param["data"]), "label": concat_dataframes(param["label"]), "weight": concat_dataframes(param["weight"]), "base_margin": concat_dataframes(param["base_margin"]), "label_lower_bound": concat_dataframes(param["label_lower_bound"]), "label_upper_bound": concat_dataframes(param["label_upper_bound"]), } param.update(dm_param) ll = param.pop("label_lower_bound", None) lu = param.pop("label_upper_bound", None) matrix = xgb.DMatrix(**param) matrix.set_info(label_lower_bound=ll, label_upper_bound=lu) return matrix