def eval_fn( predictor_kwargs: Dict[str, Any] = None, model_ref: ObjectRef = None, # noqa: F821 training_set_metadata: Dict[str, Any] = None, features: Dict[str, Dict] = None, **kwargs, ): # Pin GPU before loading the model to prevent memory leaking onto other devices hvd = initialize_horovod() try: initialize_pytorch(horovod=hvd) eval_shard = RayDatasetShard( rt.get_dataset_shard("eval"), features, training_set_metadata, ) model = ray.get(model_ref) device = get_torch_device() model = model.to(device) predictor = RemotePredictor(model=model, horovod=hvd, report_tqdm_to_ray=True, **predictor_kwargs) return predictor.batch_evaluation(eval_shard, **kwargs) finally: torch.cuda.empty_cache() hvd.shutdown()
def tune_learning_rate_fn( dataset: RayDataset, config: Dict[str, Any], data_loader_kwargs: Dict[str, Any] = None, executable_kwargs: Dict[str, Any] = None, model: ECD = None, # noqa: F821 training_set_metadata: Dict[str, Any] = None, features: Dict[str, Dict] = None, **kwargs, ) -> float: # Pin GPU before loading the model to prevent memory leaking onto other devices hvd = initialize_horovod() try: initialize_pytorch(horovod=hvd) pipe = dataset.pipeline(shuffle=False, **data_loader_kwargs) train_shard = RayDatasetShard( pipe, features, training_set_metadata, ) device = get_torch_device() model = model.to(device) trainer = RemoteTrainer(model=model, horovod=hvd, **executable_kwargs) return trainer.tune_learning_rate(config, train_shard, **kwargs) finally: torch.cuda.empty_cache() hvd.shutdown()
def load(self, save_path): """Loads the model from the given path.""" weights_save_path = os.path.join(save_path, MODEL_WEIGHTS_FILE_NAME) self.lgb_booster = lgb.Booster(model_file=weights_save_path) self.compile() device = torch.device(get_torch_device()) self.compiled_model.to(device)
def __init__(self): model = ray.get(model_ref) device = get_torch_device() self.model = model.to(device) self.output_columns = output_columns self.features = features self.training_set_metadata = training_set_metadata self.reshape_map = { f[PROC_COLUMN]: training_set_metadata[f[NAME]].get("reshape") for f in features.values() } predictor = Predictor(model, **predictor_kwargs) self.predict = partial(predictor.predict_single, *args, **kwargs)
from typing import List import pytest import torch from ludwig.modules.embedding_modules import Embed, EmbedSequence, EmbedSet, EmbedWeighted, TokenAndPositionEmbedding from ludwig.utils.torch_utils import get_torch_device DEVICE = get_torch_device() @pytest.mark.parametrize("vocab", [["a", "b", "c"]]) @pytest.mark.parametrize("embedding_size", [2]) @pytest.mark.parametrize("representation", ["dense", "sparse"]) def test_embed( vocab: List[str], embedding_size: int, representation: str, ): embed = Embed( vocab=vocab, embedding_size=embedding_size, representation=representation, ).to(DEVICE) inputs = torch.randint(0, 2, size=(2, 1)).bool().to(DEVICE) outputs = embed(inputs) assert outputs.shape[1:] == embed.output_shape @pytest.mark.parametrize("vocab", [["a", "b", "c", "d"]]) @pytest.mark.parametrize("embedding_size", [3])
def load(self, save_path): """Loads the model from the given path.""" weights_save_path = os.path.join(save_path, MODEL_WEIGHTS_FILE_NAME) device = torch.device(get_torch_device()) self.load_state_dict(torch.load(weights_save_path, map_location=device))
def train_fn( executable_kwargs: Dict[str, Any] = None, model_ref: ObjectRef = None, # noqa: F821 training_set_metadata: Dict[str, Any] = None, features: Dict[str, Dict] = None, **kwargs, ): # Pin GPU before loading the model to prevent memory leaking onto other devices hvd = initialize_horovod() try: initialize_pytorch(horovod=hvd) train_shard = RayDatasetShard( rt.get_dataset_shard("train"), features, training_set_metadata, ) try: val_shard = rt.get_dataset_shard("val") except KeyError: val_shard = None if val_shard is not None: val_shard = RayDatasetShard( val_shard, features, training_set_metadata, ) try: test_shard = rt.get_dataset_shard("test") except KeyError: test_shard = None if test_shard is not None: test_shard = RayDatasetShard( test_shard, features, training_set_metadata, ) model = ray.get(model_ref) device = get_torch_device() model = model.to(device) trainer = RemoteTrainer(model=model, horovod=hvd, report_tqdm_to_ray=True, **executable_kwargs) results = trainer.train(train_shard, val_shard, test_shard, **kwargs) if results is not None: # only return the model state dict back to the head node. trained_model, *args = results results = (trained_model.cpu().state_dict(), *args) torch.cuda.empty_cache() train_results = results, trainer.validation_field, trainer.validation_metric finally: hvd.shutdown() return train_results
import torch import torch.nn as nn from ludwig.modules.initializer_modules import get_initializer from ludwig.utils.torch_utils import get_torch_device DEVICE = "cuda:0" if get_torch_device() == "cuda" else "cpu" def test_get_initializer(): """Currently only checks for when the parameters are default case.""" tensor_size = (2, 3) # Test for when the parameters are default torch.random.manual_seed(0) initialized_tensor = get_initializer("xavier_uniform")(*tensor_size, device=DEVICE) # Check that the tensor using the expected initialization and the same seed is identical default_initializer = nn.init.xavier_uniform_ torch.random.manual_seed(0) default_tensor = default_initializer( torch.empty(*tensor_size, device=DEVICE)) assert torch.equal(initialized_tensor, default_tensor)