示例#1
0
def train_ray(num_workers, num_boost_rounds, num_files=0, use_gpu=False):
    path = "/data/parted.parquet"

    if num_files:
        files = sorted(glob.glob(f"{path}/**/*.parquet"))
        while num_files > len(files):
            files = files + files
        path = files[0:num_files]

    use_device_matrix = False
    if use_gpu:
        try:
            import cupy  # noqa: F401
            use_device_matrix = True
        except ImportError:
            use_device_matrix = False

    if use_device_matrix:
        dtrain = RayDeviceQuantileDMatrix(path,
                                          num_actors=num_workers,
                                          label="labels",
                                          ignore=["partition"],
                                          filetype=RayFileType.PARQUET)
    else:
        dtrain = RayDMatrix(path,
                            num_actors=num_workers,
                            label="labels",
                            ignore=["partition"],
                            filetype=RayFileType.PARQUET)

    config = {
        "tree_method": "hist" if not use_gpu else "gpu_hist",
        "eval_metric": ["logloss", "error"],
    }

    start = time.time()
    evals_result = {}
    bst = train(config,
                dtrain,
                evals_result=evals_result,
                max_actor_restarts=2,
                num_boost_round=num_boost_rounds,
                num_actors=num_workers,
                cpus_per_actor=4,
                checkpoint_path="/tmp/checkpoint/",
                gpus_per_actor=0 if not use_gpu else 1,
                resources_per_actor={
                    "actor_cpus": 4,
                    "actor_gpus": 0 if not use_gpu else 1
                },
                evals=[(dtrain, "train")])
    taken = time.time() - start
    print(f"TRAIN TIME TAKEN: {taken:.2f} seconds")

    bst.save_model("benchmark_{}.xgb".format("cpu" if not use_gpu else "gpu"))
    print("Final training error: {:.4f}".format(
        evals_result["train"]["error"][-1]))
    return taken
示例#2
0
def train_ray(path,
              num_workers,
              num_boost_rounds,
              num_files=0,
              regression=False,
              use_gpu=False,
              ray_params=None,
              xgboost_params=None,
              **kwargs):
    if not os.path.exists(path):
        raise ValueError(f"Path does not exist: {path}")

    if num_files:
        files = sorted(glob.glob(f"{path}/**/*.parquet"))
        while num_files > len(files):
            files = files + files
        path = files[0:num_files]

    use_device_matrix = False
    if use_gpu:
        try:
            import cupy  # noqa: F401
            use_device_matrix = True
        except ImportError:
            use_device_matrix = False

    if use_device_matrix:
        dtrain = RayDeviceQuantileDMatrix(
            path,
            num_actors=num_workers,
            label="labels",
            ignore=["partition"],
            filetype=RayFileType.PARQUET)
    else:
        dtrain = RayDMatrix(
            path,
            num_actors=num_workers,
            label="labels",
            ignore=["partition"],
            filetype=RayFileType.PARQUET)

    config = {"tree_method": "hist" if not use_gpu else "gpu_hist"}

    if not regression:
        # Classification
        config.update({
            "objective": "binary:logistic",
            "eval_metric": ["logloss", "error"],
        })
    else:
        # Regression
        config.update({
            "objective": "reg:squarederror",
            "eval_metric": ["logloss", "rmse"],
        })

    if xgboost_params:
        config.update(xgboost_params)

    start = time.time()
    evals_result = {}
    additional_results = {}
    bst = train(
        config,
        dtrain,
        evals_result=evals_result,
        additional_results=additional_results,
        num_boost_round=num_boost_rounds,
        ray_params=ray_params or RayParams(
            max_actor_restarts=2,
            num_actors=num_workers,
            cpus_per_actor=1,
            gpus_per_actor=1 if not use_gpu else 1),
        evals=[(dtrain, "train")],
        **kwargs)
    taken = time.time() - start
    print(f"TRAIN TIME TAKEN: {taken:.2f} seconds")

    bst.save_model("benchmark_{}.xgb".format("cpu" if not use_gpu else "gpu"))
    print("Final training error: {:.4f}".format(
        evals_result["train"]["error"][-1]))
    return bst, additional_results, taken
示例#3
0
def train_ray(train_files,
              eval_files,
              num_workers,
              num_boost_round,
              regression=False,
              use_gpu=False,
              ray_params=None,
              xgboost_params=None,
              ft_manager=None,
              aws=None,
              **kwargs):
    use_device_matrix = False
    if use_gpu:
        try:
            import cupy  # noqa: F401
            use_device_matrix = True
        except ImportError:
            use_device_matrix = False

    if use_gpu and use_device_matrix:
        dtrain = RayDeviceQuantileDMatrix(train_files,
                                          num_actors=num_workers,
                                          label="labels",
                                          ignore=["partition"],
                                          filetype=RayFileType.PARQUET)
        deval = RayDeviceQuantileDMatrix(eval_files,
                                         num_actors=num_workers,
                                         label="labels",
                                         ignore=["partition"],
                                         filetype=RayFileType.PARQUET)
    else:
        dtrain = RayDMatrix(train_files,
                            num_actors=num_workers,
                            label="labels",
                            ignore=["partition"],
                            filetype=RayFileType.PARQUET)
        deval = RayDMatrix(eval_files,
                           num_actors=num_workers,
                           label="labels",
                           ignore=["partition"],
                           filetype=RayFileType.PARQUET)

    config = xgboost_params or {"tree_method": "hist"}

    if use_gpu:
        config.update({"tree_method": "gpu_hist"})

    if not regression:
        # Classification
        config.update({
            "objective": "binary:logistic",
            "eval_metric": ["logloss", "error"],
        })
        return_metric = "error"
    else:
        # Regression
        config.update({
            "objective": "reg:squarederror",
            "eval_metric": ["logloss", "rmse"],
        })
        return_metric = "rmse"

    xgboost_callbacks = []
    distributed_callbacks = []
    if ft_manager:
        delay_callback = DelayedLoadingCallback(ft_manager,
                                                reload_data=True,
                                                sleep_time=0.1)
        distributed_callbacks.append(delay_callback)

        die_callback = DieCallback(ft_manager, training_delay=0.1)
        xgboost_callbacks.append(die_callback)

    if aws:
        aws_callback = EnvironmentCallback(aws)
        distributed_callbacks.append(aws_callback)
        os.environ.update(aws)

    ray_params = ray_params or RayParams()
    ray_params.num_actors = num_workers
    ray_params.gpus_per_actor = 0 if not use_gpu else 1
    ray_params.distributed_callbacks = distributed_callbacks

    evals_result = {}
    additional_results = {}
    bst = train(config,
                dtrain,
                evals_result=evals_result,
                additional_results=additional_results,
                num_boost_round=num_boost_round,
                ray_params=ray_params,
                evals=[(dtrain, "train"), (deval, "eval")],
                callbacks=xgboost_callbacks,
                **kwargs)

    bst.save_model("benchmark_{}.xgb".format("cpu" if not use_gpu else "gpu"))
    print("Final training error: {:.4f}".format(
        evals_result["train"][return_metric][-1]))

    results = {
        "train-logloss": evals_result["train"]["logloss"][-1],
        f"train-{return_metric}": evals_result["train"][return_metric][-1],
        "eval-logloss": evals_result["eval"]["logloss"][-1],
        f"eval-{return_metric}": evals_result["eval"][return_metric][-1],
        "total_n": additional_results["total_n"]
    }

    return bst, results
示例#4
0
def train_ray(
    path,
    num_workers,
    num_boost_rounds,
    num_files=0,
    regression=False,
    use_gpu=False,
    ray_params=None,
    xgboost_params=None,
    **kwargs,
):
    if not isinstance(path, list):
        path = get_parquet_files(path, num_files=num_files)

    use_device_matrix = False
    if use_gpu:
        try:
            import cupy  # noqa: F401

            use_device_matrix = True
        except ImportError:
            use_device_matrix = False

    if use_device_matrix:
        dtrain = RayDeviceQuantileDMatrix(
            path,
            num_actors=num_workers,
            label="labels",
            ignore=["partition"],
            filetype=RayFileType.PARQUET,
        )
    else:
        dtrain = RayDMatrix(
            path,
            num_actors=num_workers,
            label="labels",
            ignore=["partition"],
            filetype=RayFileType.PARQUET,
        )

    config = {"tree_method": "hist" if not use_gpu else "gpu_hist"}

    if not regression:
        # Classification
        config.update(
            {
                "objective": "binary:logistic",
                "eval_metric": ["logloss", "error"],
            }
        )
    else:
        # Regression
        config.update(
            {
                "objective": "reg:squarederror",
                "eval_metric": ["logloss", "rmse"],
            }
        )

    if xgboost_params:
        config.update(xgboost_params)

    start = time.time()
    evals_result = {}
    additional_results = {}
    bst = train(
        config,
        dtrain,
        evals_result=evals_result,
        additional_results=additional_results,
        num_boost_round=num_boost_rounds,
        ray_params=ray_params
        or RayParams(
            max_actor_restarts=2,
            num_actors=num_workers,
            cpus_per_actor=1,
            gpus_per_actor=1 if not use_gpu else 1,
        ),
        evals=[(dtrain, "train")],
        **kwargs,
    )
    taken = time.time() - start
    print(f"TRAIN TIME TAKEN: {taken:.2f} seconds")

    out_file = os.path.expanduser(
        "~/benchmark_{}.xgb".format("cpu" if not use_gpu else "gpu")
    )
    bst.save_model(out_file)

    print("Final training error: {:.4f}".format(evals_result["train"]["error"][-1]))
    return bst, additional_results, taken