def test_rf_concatenation_dask(client, model_type): from cuml.fil.fil import TreeliteModel X, y = make_classification(n_samples=1000, n_features=30, random_state=123, n_classes=2) X = X.astype(np.float32) if model_type == 'classification': y = y.astype(np.int32) else: y = y.astype(np.float32) n_estimators = 40 cu_rf_params = {'n_estimators': n_estimators} X_df, y_df = _prep_training_data(client, X, y, partitions_per_worker=2) if model_type == 'classification': cu_rf_mg = cuRFC_mg(**cu_rf_params) else: cu_rf_mg = cuRFR_mg(**cu_rf_params) cu_rf_mg.fit(X_df, y_df) res1 = cu_rf_mg.predict(X_df) res1.compute() local_tl = TreeliteModel.from_treelite_model_handle( cu_rf_mg.internal_model._obtain_treelite_handle(), take_handle_ownership=False) assert local_tl.num_trees == n_estimators
def _obtain_treelite_model(treelite_handle): """ Creates a Treelite model using the treelite handle obtained from the cuML Random Forest model. Returns ---------- tl_to_fil_model : Treelite version of this model """ treelite_model = \ tl.from_treelite_model_handle(treelite_handle) return treelite_model
def _concat_treelite_models(self): """ Convert the cuML Random Forest model present in different workers to the treelite format and then concatenate the different treelite models to create a single model. The concatenated model is then converted to bytes format. """ model_serialized_futures = list() for w in self.active_workers: model_serialized_futures.append( dask.delayed(_get_serialized_model)(self.rfs[w])) mod_bytes = self.client.compute(model_serialized_futures, sync=True) last_worker = w model = self.rfs[last_worker].result() all_tl_mod_handles = [ model._tl_handle_from_bytes(indiv_worker_model_bytes) for indiv_worker_model_bytes in mod_bytes ] model._concatenate_treelite_handle(all_tl_mod_handles) for tl_handle in all_tl_mod_handles: TreeliteModel.free_treelite_model(tl_handle) return model