def testDistributedTile(self): X, y, w = self.X, self.y, self.weight X = X.tiles() y = y.tiles() w = w.tiles() workers = ['addr1:1', 'addr2:1'] chunk_to_workers = dict() X_chunk_to_workers = { c.key: workers[i % 2] for i, c in enumerate(X.chunks) } chunk_to_workers.update(X_chunk_to_workers) y_chunk_to_workers = { c.key: workers[i % 2] for i, c in enumerate(y.chunks) } chunk_to_workers.update(y_chunk_to_workers) w_chunk_to_workers = { c.key: workers[i % 2] for i, c in enumerate(w.chunks) } chunk_to_workers.update(w_chunk_to_workers) class MockDistributedDictContext(ContextBase): @property def running_mode(self): return RunningMode.distributed def get_chunk_metas(self, chunk_keys): metas = [] for ck in chunk_keys: if ck in chunk_to_workers: metas.append( ChunkMeta(chunk_size=None, chunk_shape=None, workers=[chunk_to_workers[ck]])) else: metas.append( ChunkMeta(chunk_size=None, chunk_shape=None, workers=None)) return metas dmatrix = ToDMatrix(data=X, label=y, weight=w)() model = XGBTrain(dtrain=dmatrix)() with MockDistributedDictContext(): model = model.tiles() dmatrix = get_tiled(dmatrix) # 2 workers self.assertEqual(len(dmatrix.chunks), 2) self.assertEqual(len(model.chunks), 2)
def testSerializeLocalTrain(self): sess = new_session() with LocalContext(sess._sess): dmatrix = ToDMatrix(data=self.X, label=self.y)() model = XGBTrain(dtrain=dmatrix)() graph = model.build_graph(tiled=True) DAG.from_json(graph.to_json()) dmatrix = ToDMatrix(data=self.X_df, label=self.y_series, output_types=[OutputType.dataframe])() model = XGBTrain(dtrain=dmatrix)() graph = model.build_graph(tiled=True) DAG.from_json(graph.to_json()) new_X = mt.random.rand(1000, 10, chunk_size=(1000, 5)) new_X, new_y = ToDMatrix(data=new_X, label=self.y, multi_output=True)() dmatrix = ToDMatrix(data=new_X, label=new_y)() dmatrix = dmatrix.tiles() self.assertEqual(len(dmatrix.chunks), 1)