示例#1
0
def eval_fn(
    predictor_kwargs: Dict[str, Any] = None,
    model_ref: ObjectRef = None,  # noqa: F821
    training_set_metadata: Dict[str, Any] = None,
    features: Dict[str, Dict] = None,
    **kwargs,
):
    # Pin GPU before loading the model to prevent memory leaking onto other devices
    hvd = initialize_horovod()
    try:
        initialize_pytorch(horovod=hvd)

        eval_shard = RayDatasetShard(
            rt.get_dataset_shard("eval"),
            features,
            training_set_metadata,
        )

        model = ray.get(model_ref)
        device = get_torch_device()
        model = model.to(device)

        predictor = RemotePredictor(model=model,
                                    horovod=hvd,
                                    report_tqdm_to_ray=True,
                                    **predictor_kwargs)
        return predictor.batch_evaluation(eval_shard, **kwargs)
    finally:
        torch.cuda.empty_cache()
        hvd.shutdown()
示例#2
0
def test_initialize_pytorch_with_gpu_int(mock_torch):
    mock_torch.cuda.is_available.return_value = True
    mock_torch.cuda.device_count.return_value = 4
    with clean_params():
        initialize_pytorch(gpus=1)
    mock_torch.cuda.set_device.assert_called_with(1)
    assert "CUDA_VISIBLE_DEVICES" not in os.environ
示例#3
0
def tune_learning_rate_fn(
    dataset: RayDataset,
    config: Dict[str, Any],
    data_loader_kwargs: Dict[str, Any] = None,
    executable_kwargs: Dict[str, Any] = None,
    model: ECD = None,  # noqa: F821
    training_set_metadata: Dict[str, Any] = None,
    features: Dict[str, Dict] = None,
    **kwargs,
) -> float:
    # Pin GPU before loading the model to prevent memory leaking onto other devices
    hvd = initialize_horovod()
    try:
        initialize_pytorch(horovod=hvd)

        pipe = dataset.pipeline(shuffle=False, **data_loader_kwargs)
        train_shard = RayDatasetShard(
            pipe,
            features,
            training_set_metadata,
        )

        device = get_torch_device()
        model = model.to(device)

        trainer = RemoteTrainer(model=model, horovod=hvd, **executable_kwargs)
        return trainer.tune_learning_rate(config, train_shard, **kwargs)
    finally:
        torch.cuda.empty_cache()
        hvd.shutdown()
示例#4
0
def test_initialize_pytorch_with_gpu_list(mock_torch):
    # For test purposes, these devices can be anything, we just need to be able to uniquely
    # identify them.
    mock_torch.cuda.is_available.return_value = True
    mock_torch.cuda.device_count.return_value = 4
    with clean_params():
        initialize_pytorch(gpus=[1, 2])
    assert os.environ["CUDA_VISIBLE_DEVICES"] == "1,2"
示例#5
0
    def __init__(self, model: ECD, gpus=None, gpu_memory_limit=None, allow_parallel_threads=True, **kwargs):
        horovod = initialize_horovod()
        initialize_pytorch(
            gpus=gpus, gpu_memory_limit=gpu_memory_limit, allow_parallel_threads=allow_parallel_threads, horovod=horovod
        )
        super().__init__(model, horovod=horovod, **kwargs)

        # Only return results from rank 0 to reduce network overhead
        self.batch_predict = return_first(self.batch_predict)
        self.batch_evaluation = return_first(self.batch_evaluation)
示例#6
0
def test_initialize_pytorch_with_horovod_explicit_gpus(mock_torch):
    mock_torch.cuda.is_available.return_value = True
    mock_torch.cuda.device_count.return_value = 4

    mock_hvd = Mock()
    mock_hvd.local_rank.return_value = 1
    mock_hvd.local_size.return_value = 4

    with clean_params():
        initialize_pytorch(gpus="-1", horovod=mock_hvd)

    assert os.environ["CUDA_VISIBLE_DEVICES"] == ""
示例#7
0
def test_initialize_pytorch_with_horovod(mock_torch):
    mock_torch.cuda.is_available.return_value = True
    mock_torch.cuda.device_count.return_value = 4

    mock_hvd = Mock()
    mock_hvd.local_rank.return_value = 1
    mock_hvd.local_size.return_value = 4

    with clean_params():
        initialize_pytorch(horovod=mock_hvd)

    mock_torch.cuda.set_device.assert_called_with(1)
    assert "CUDA_VISIBLE_DEVICES" not in os.environ
示例#8
0
def train_fn(
    executable_kwargs: Dict[str, Any] = None,
    model: "LudwigModel" = None,  # noqa: F821
    training_set_metadata: Dict[str, Any] = None,
    features: Dict[str, Dict] = None,
    **kwargs,
):
    # Pin GPU before loading the model to prevent memory leaking onto other devices
    hvd = initialize_horovod()
    initialize_pytorch(horovod=hvd)

    train_shard = RayDatasetShard(
        rt.get_dataset_shard("train"),
        features,
        training_set_metadata,
    )

    try:
        val_shard = rt.get_dataset_shard("val")
    except KeyError:
        val_shard = None

    if val_shard is not None:
        val_shard = RayDatasetShard(
            val_shard,
            features,
            training_set_metadata,
        )

    try:
        test_shard = rt.get_dataset_shard("test")
    except KeyError:
        test_shard = None

    if test_shard is not None:
        test_shard = RayDatasetShard(
            test_shard,
            features,
            training_set_metadata,
        )

    trainer = RemoteTrainer(model=model, **executable_kwargs)
    results = trainer.train(train_shard, val_shard, test_shard, **kwargs)

    # TODO(shreya): Figure out GPU memory leak
    # TODO(shreya): Check if placing model off GPU explicitly makes a difference
    # Clear CUDA memory, place model on CPU, return model to user
    # torch.cuda.empty_cache()
    # model.cpu()

    return results, trainer.validation_field, trainer.validation_metric
示例#9
0
def test_initialize_pytorch_with_horovod_bad_local_rank(mock_torch, mock_warnings):
    """In this scenario, the local_size 5 is out of the bounds of the GPU indices."""
    mock_torch.cuda.is_available.return_value = True
    mock_torch.cuda.device_count.return_value = 4

    mock_hvd = Mock()
    mock_hvd.local_rank.return_value = 1
    mock_hvd.local_size.return_value = 5

    with clean_params():
        initialize_pytorch(horovod=mock_hvd)

    assert os.environ["CUDA_VISIBLE_DEVICES"] == ""
    mock_warnings.warn.assert_called()
示例#10
0
def test_initialize_pytorch_only_once(mock_torch):
    mock_torch.cuda.is_available.return_value = True
    mock_torch.cuda.device_count.return_value = 4
    with clean_params():
        # During first time initialization, set pytorch parallelism
        initialize_pytorch(allow_parallel_threads=False)
        mock_torch.set_num_threads.assert_called_once()
        mock_torch.set_num_interop_threads.assert_called_once()

        # Reset call counts on all threading calls
        mock_torch.reset_mock()

        # In the second call to initialization, avoid calling these methods again, as pytorch
        # will raise an exception
        initialize_pytorch(allow_parallel_threads=False)
        mock_torch.set_num_threads.assert_not_called()
        mock_torch.set_num_interop_threads.assert_not_called()

    # No GPUs were specified, so this should not have been called even once
    mock_torch.cuda.memory.set_per_process_memory_fraction.assert_not_called()
示例#11
0
文件: ray.py 项目: yarenty/ludwig
def legacy_train_fn(
    trainer: RemoteTrainer = None,
    remote_model: "LudwigModel" = None,  # noqa: F821
    training_set_metadata: Dict[str, Any] = None,
    features: Dict[str, Dict] = None,
    train_shards: List[DatasetPipeline] = None,
    val_shards: List[DatasetPipeline] = None,
    test_shards: List[DatasetPipeline] = None,
    **kwargs,
):
    # Pin GPU before loading the model to prevent memory leaking onto other devices
    hvd = initialize_horovod()
    initialize_pytorch(horovod=hvd)

    train_shard = RayDatasetShard(
        train_shards[hvd.rank()],
        features,
        training_set_metadata,
    )

    val_shard = val_shards[hvd.rank()] if val_shards else None
    if val_shard is not None:
        val_shard = RayDatasetShard(
            val_shard,
            features,
            training_set_metadata,
        )

    test_shard = test_shards[hvd.rank()] if test_shards else None
    if test_shard is not None:
        test_shard = RayDatasetShard(
            test_shard,
            features,
            training_set_metadata,
        )

    results = trainer.train(train_shard, val_shard, test_shard, **kwargs)
    return results
示例#12
0
 def initialize_pytorch(self, *args, **kwargs):
     initialize_pytorch(*args, horovod=self._horovod, **kwargs)
示例#13
0
def memory_tune_config(config, dataset, model_category, row_count):
    fits_in_memory = False
    tried_reduce_seq_len = False
    raw_config = merge_with_defaults(config)
    training_set_metadata = get_trainingset_metadata(raw_config, dataset)
    modified_hyperparam_search_space = copy.deepcopy(
        raw_config[HYPEROPT]["parameters"])
    current_param_values = {}
    param_list = []
    model_type = get_model_type(raw_config)
    if model_type in RANKED_MODIFIABLE_PARAM_LIST:
        params_to_modify = RANKED_MODIFIABLE_PARAM_LIST[model_type]
        if len(params_to_modify.keys()) > 0:
            param_list = list(params_to_modify.keys())
            max_memory = _get_machine_memory()
            initialize_pytorch()

    while param_list:
        # compute memory utilization
        current_param_values = get_new_params(
            current_param_values, modified_hyperparam_search_space,
            params_to_modify)
        temp_config = sub_new_params(raw_config, current_param_values)
        mem_use = compute_memory_usage(temp_config, training_set_metadata,
                                       model_category)
        if mem_use > max_memory and model_category == TEXT and not tried_reduce_seq_len:
            tried_reduce_seq_len = True
            if reduce_text_feature_max_length(config, training_set_metadata):
                reduce_text_feature_max_length(temp_config,
                                               training_set_metadata)
                mem_use = compute_memory_usage(temp_config,
                                               training_set_metadata,
                                               model_category)
        logging.info(
            f"Checking model estimated mem use {mem_use} against memory size {max_memory}"
        )
        if mem_use <= max_memory:
            fits_in_memory = True
            break
        # check if we have exhausted tuning of current param (e.g. we can no longer reduce the param value)
        param, min_value = param_list[0], params_to_modify[param_list[0]]

        if param in modified_hyperparam_search_space.keys():
            param_space = modified_hyperparam_search_space[param]["space"]
            if param_space == "choice":
                if (len(modified_hyperparam_search_space[param]["categories"])
                        >= 2 and modified_hyperparam_search_space[param]
                    ["categories"][-2] >= min_value):
                    modified_hyperparam_search_space[param][
                        "categories"] = modified_hyperparam_search_space[
                            param]["categories"][:-1]
                else:
                    param_list.pop(0)  # exhausted reduction of this parameter
            else:
                # reduce by 10%
                upper_bound, lower_bound = (
                    modified_hyperparam_search_space[param]["upper"],
                    modified_hyperparam_search_space[param]["lower"],
                )
                reduction_val = (upper_bound - lower_bound) * 0.1
                new_upper_bound = upper_bound - reduction_val
                if (new_upper_bound
                    ) > lower_bound and new_upper_bound > min_value:
                    modified_hyperparam_search_space[param][
                        "upper"] = new_upper_bound
                else:
                    param_list.pop(0)  # exhausted reduction of this parameter
        else:
            param_list.pop(0)  # param not in hyperopt search space

    if model_category == TEXT and row_count > AUTOML_LARGE_TEXT_DATASET:
        if "checkpoints_per_epoch" not in config[
                TRAINER] and "steps_per_checkpoint" not in config[TRAINER]:
            checkpoints_per_epoch = max(
                2, math.floor(row_count / AUTOML_MAX_ROWS_PER_CHECKPOINT))
            config[TRAINER][
                "checkpoints_per_epoch"] = checkpoints_per_epoch  # decrease latency to get model accuracy signal
        if "evaluate_training_set" not in config[TRAINER]:
            config[TRAINER][
                "evaluate_training_set"] = False  # reduce overhead for increased evaluation frequency
        if not fits_in_memory:
            # Switch to smaller pre-trained model encoder for large datasets.
            _update_text_encoder(config["input_features"],
                                 AUTOML_DEFAULT_TEXT_ENCODER,
                                 AUTOML_SMALLER_TEXT_ENCODER)

    modified_config = copy.deepcopy(config)

    modified_config[HYPEROPT]["parameters"] = modified_hyperparam_search_space
    modified_config[HYPEROPT]["executor"]["num_samples"] = _update_num_samples(
        modified_config[HYPEROPT]["executor"]["num_samples"],
        modified_hyperparam_search_space)
    return modified_config, fits_in_memory
示例#14
0
def test_initialize_pytorch_without_gpu(mock_torch):
    mock_torch.cuda.is_available.return_value = True
    mock_torch.cuda.device_count.return_value = 4
    with clean_params():
        initialize_pytorch(gpus=-1)
    assert os.environ["CUDA_VISIBLE_DEVICES"] == ""
示例#15
0
文件: ray.py 项目: yarenty/ludwig
def train_fn(
    executable_kwargs: Dict[str, Any] = None,
    model_ref: ObjectRef = None,  # noqa: F821
    training_set_metadata: Dict[str, Any] = None,
    features: Dict[str, Dict] = None,
    **kwargs,
):
    # Pin GPU before loading the model to prevent memory leaking onto other devices
    hvd = initialize_horovod()
    try:
        initialize_pytorch(horovod=hvd)

        train_shard = RayDatasetShard(
            rt.get_dataset_shard("train"),
            features,
            training_set_metadata,
        )

        try:
            val_shard = rt.get_dataset_shard("val")
        except KeyError:
            val_shard = None

        if val_shard is not None:
            val_shard = RayDatasetShard(
                val_shard,
                features,
                training_set_metadata,
            )

        try:
            test_shard = rt.get_dataset_shard("test")
        except KeyError:
            test_shard = None

        if test_shard is not None:
            test_shard = RayDatasetShard(
                test_shard,
                features,
                training_set_metadata,
            )

        model = ray.get(model_ref)
        device = "cuda" if torch.cuda.is_available() else "cpu"
        model = model.to(device)

        trainer = RemoteTrainer(model=model, horovod=hvd, **executable_kwargs)
        results = trainer.train(train_shard, val_shard, test_shard, **kwargs)

        if results is not None:
            # only return the model state dict back to the head node.
            trained_model, *args = results
            results = (trained_model.cpu().state_dict(), *args)

        torch.cuda.empty_cache()

        train_results = results, trainer.validation_field, trainer.validation_metric

    finally:
        hvd.shutdown()
    return train_results
示例#16
0
文件: ray.py 项目: yarenty/ludwig
 def initialize_pytorch(self, **kwargs):
     # Make sure we don't claim any GPU resources on the head node
     initialize_pytorch(gpus=-1)
     self._pytorch_kwargs = kwargs
示例#17
0
def memory_tune_config(config, dataset, model_category, row_count):
    fits_in_memory = False
    raw_config = merge_with_defaults(config)
    training_set_metadata = get_trainingset_metadata(raw_config, dataset)
    modified_hyperparam_search_space = copy.deepcopy(
        raw_config[HYPEROPT]["parameters"])
    current_param_values = {}
    param_list = []
    model_type = get_model_type(raw_config)
    if model_type in RANKED_MODIFIABLE_PARAM_LIST:
        params_to_modify = RANKED_MODIFIABLE_PARAM_LIST[model_type]
        if len(params_to_modify.keys()) > 0:
            param_list = list(params_to_modify.keys())
            max_memory = _get_machine_memory()
            initialize_pytorch()

    while param_list:
        # compute memory utilization
        current_param_values = get_new_params(
            current_param_values, modified_hyperparam_search_space,
            params_to_modify)
        temp_config = sub_new_params(raw_config, current_param_values)
        mem_use = compute_memory_usage(temp_config, training_set_metadata)
        logging.info(
            f"Checking model estimated mem use {mem_use} against memory size {max_memory}"
        )
        if mem_use <= max_memory:
            fits_in_memory = True
            break
        # check if we have exhausted tuning of current param (e.g. we can no longer reduce the param value)
        param, min_value = param_list[0], params_to_modify[param_list[0]]

        if param in modified_hyperparam_search_space.keys():
            param_space = modified_hyperparam_search_space[param]["space"]
            if param_space == "choice":
                if (len(modified_hyperparam_search_space[param]["categories"])
                        >= 2 and modified_hyperparam_search_space[param]
                    ["categories"][-2] >= min_value):
                    modified_hyperparam_search_space[param][
                        "categories"] = modified_hyperparam_search_space[
                            param]["categories"][:-1]
                else:
                    param_list.pop(0)  # exhausted reduction of this parameter
            else:
                # reduce by 10%
                upper_bound, lower_bound = (
                    modified_hyperparam_search_space[param]["upper"],
                    modified_hyperparam_search_space[param]["lower"],
                )
                reduction_val = (upper_bound - lower_bound) * 0.1
                new_upper_bound = upper_bound - reduction_val
                if (new_upper_bound
                    ) > lower_bound and new_upper_bound > min_value:
                    modified_hyperparam_search_space[param][
                        "upper"] = new_upper_bound
                else:
                    param_list.pop(0)  # exhausted reduction of this parameter
        else:
            param_list.pop(0)  # param not in hyperopt search space

    if not fits_in_memory and model_category == TEXT:
        reduce_text_model_mem(config, training_set_metadata, row_count)

    modified_config = copy.deepcopy(config)

    modified_config[HYPEROPT]["parameters"] = modified_hyperparam_search_space
    return modified_config, fits_in_memory
示例#18
0
 def initialize_pytorch(self, *args, **kwargs):
     initialize_pytorch(*args, **kwargs)