def load_orca_checkpoint(self, path, version, prefix=None): """ Load existing checkpoint :param path: Path to the existing checkpoint. :param version: checkpoint version, which is the suffix of model.* file, i.e., for modle.4 file, the version is 4. :param prefix: optimMethod prefix, for example 'optimMethod-Sequentialf53bddcc' :return: """ from bigdl.nn.layer import Model, Container from bigdl.optim.optimizer import OptimMethod import os try: self.model = Model.load( os.path.join(path, "model.{}".format(version))) assert isinstance(self.model, Container), \ "The loaded model should be a Container, please check your checkpoint type." self.optimizer = OptimMethod.load( os.path.join(path, "{}.{}".format(prefix, version))) except Exception: raise ValueError( "Cannot load BigDL checkpoint, please check your checkpoint path " "and checkpoint type.") self.estimator = SparkEstimator(self.model, self.optimizer, self.model_dir) self.nn_estimator = NNEstimator(self.model, self.loss, self.feature_preprocessing, self.label_preprocessing) if self.optimizer is not None: self.nn_estimator.setOptimMethod(self.optimizer) self.nn_model = NNModel( self.model, feature_preprocessing=self.feature_preprocessing)
def __init__(self, *, model, loss, optimizer=None, metrics=None, feature_preprocessing=None, label_preprocessing=None, model_dir=None): self.loss = loss self.optimizer = optimizer self.metrics = Metrics.convert_metrics_list(metrics) self.feature_preprocessing = feature_preprocessing self.label_preprocessing = label_preprocessing self.model_dir = model_dir self.model = model self.nn_model = NNModel( self.model, feature_preprocessing=self.feature_preprocessing) self.nn_estimator = NNEstimator(self.model, self.loss, self.feature_preprocessing, self.label_preprocessing) if self.optimizer is None: from bigdl.optim.optimizer import SGD self.optimizer = SGD() self.nn_estimator.setOptimMethod(self.optimizer) self.estimator = SparkEstimator(self.model, self.optimizer, self.model_dir) self.log_dir = None self.app_name = None self.is_nnframe_fit = False
def load(self, checkpoint, optimizer=None, loss=None, feature_preprocessing=None, label_preprocessing=None, model_dir=None, is_checkpoint=False): if loss is not None: self.loss = loss if optimizer is not None: self.optimizer = optimizer if feature_preprocessing is not None: self.feature_preprocessing = feature_preprocessing if label_preprocessing is not None: self.label_preprocessing = label_preprocessing if model_dir is not None: self.model_dir = model_dir if is_checkpoint: self.load_latest_orca_checkpoint(checkpoint) else: from zoo.pipeline.api.net import Net self.model = Net.load_bigdl(checkpoint + ".bigdl", checkpoint + ".bin") self.nn_estimator = NNEstimator(self.model, self.loss, self.feature_preprocessing, self.label_preprocessing) if self.optimizer is None: from bigdl.optim.optimizer import SGD self.optimizer = SGD() self.nn_estimator.setOptimMethod(self.optimizer) self.estimator = SparkEstimator(self.model, self.optimizer, self.model_dir) self.nn_model = NNModel(self.model, feature_preprocessing=self.feature_preprocessing) return self
def load_orca_checkpoint(self, path, version=None, prefix=None): """ Load existing checkpoint. To load a specific checkpoint, please provide both `version` and `perfix`. If `version` is None, then the latest checkpoint under the specified directory will be loaded. :param path: Path to the existing checkpoint (or directory containing Orca checkpoint files). :param version: checkpoint version, which is the suffix of model.* file, i.e., for modle.4 file, the version is 4. If it is None, then load the latest checkpoint. :param prefix: optimMethod prefix, for example 'optimMethod-Sequentialf53bddcc' :return: """ from bigdl.nn.layer import Model, Container from bigdl.optim.optimizer import OptimMethod from zoo.orca.learn.utils import find_latest_checkpoint import os if version is None: path, prefix, version = find_latest_checkpoint(path, model_type="bigdl") if path is None: raise ValueError( "Cannot find BigDL checkpoint, please check your checkpoint" " path.") else: assert prefix is not None, "You should provide optimMethod prefix, " \ "for example 'optimMethod-TorchModelf53bddcc'" try: self.model = Model.load( os.path.join(path, "model.{}".format(version))) assert isinstance(self.model, Container), \ "The loaded model should be a Container, please check your checkpoint type." self.optimizer = OptimMethod.load( os.path.join(path, "{}.{}".format(prefix, version))) except Exception: raise ValueError( "Cannot load BigDL checkpoint, please check your checkpoint path " "and checkpoint type.") self.estimator = SparkEstimator(self.model, self.optimizer, self.model_dir) self.nn_estimator = NNEstimator(self.model, self.loss, self.feature_preprocessing, self.label_preprocessing) if self.optimizer is not None: self.nn_estimator.setOptimMethod(self.optimizer) self.nn_model = NNModel( self.model, feature_preprocessing=self.feature_preprocessing)
def load_orca_checkpoint(self, path, version, prefix=None): from bigdl.nn.layer import Model, Container from bigdl.optim.optimizer import OptimMethod import os try: self.model = Model.load(os.path.join(path, "model.{}".format(version))) assert isinstance(self.model, Container), \ "The loaded model should be a Container, please check your checkpoint type." self.optimizer = OptimMethod.load(os.path.join(path, "{}.{}".format(prefix, version))) except Exception: raise ValueError("Cannot load BigDL checkpoint, please check your checkpoint path " "and checkpoint type.") self.estimator = SparkEstimator(self.model, self.optimizer, self.model_dir) self.nn_estimator = NNEstimator(self.model, self.loss, self.feature_preprocessing, self.label_preprocessing) if self.optimizer is not None: self.nn_estimator.setOptimMethod(self.optimizer) self.nn_model = NNModel(self.model, feature_preprocessing=self.feature_preprocessing)
def load(self, checkpoint, optimizer=None, loss=None, feature_preprocessing=None, label_preprocessing=None, model_dir=None, is_checkpoint=False): """ Load existing BigDL model or checkpoint :param checkpoint: Path to the existing model or checkpoint. :param optimizer: BigDL optimizer. :param loss: BigDL criterion. :param feature_preprocessing: Used when data in `fit` and `predict` is a Spark DataFrame. The param converts the data in feature column to a Tensor or to a Sample directly. It expects a List of Int as the size of the converted Tensor, or a Preprocessing[F, Tensor[T]] If a List of Int is set as feature_preprocessing, it can only handle the case that feature column contains the following data types: Float, Double, Int, Array[Float], Array[Double], Array[Int] and MLlib Vector. The feature data are converted to Tensors with the specified sizes before sending to the model. Internally, a SeqToTensor is generated according to the size, and used as the feature_preprocessing. Alternatively, user can set feature_preprocessing as Preprocessing[F, Tensor[T]] that transforms the feature data to a Tensor[T]. Some pre-defined Preprocessing are provided in package zoo.feature. Multiple Preprocessing can be combined as a ChainedPreprocessing. The feature_preprocessing will also be copied to the generated NNModel and applied to feature column during transform. :param label_preprocessing: Used when data in `fit` and `predict` is a Spark DataFrame. similar to feature_preprocessing, but applies to Label data. :param model_dir: The path to save model. During the training, if checkpoint_trigger is defined and triggered, the model will be saved to model_dir. :param is_checkpoint: Whether the path is a checkpoint or a saved BigDL model. Default: False. :return: The loaded estimator object. """ if loss is not None: self.loss = loss if optimizer is not None: self.optimizer = optimizer if feature_preprocessing is not None: self.feature_preprocessing = feature_preprocessing if label_preprocessing is not None: self.label_preprocessing = label_preprocessing if model_dir is not None: self.model_dir = model_dir if is_checkpoint: self.load_latest_orca_checkpoint(checkpoint) else: from zoo.pipeline.api.net import Net self.model = Net.load_bigdl(checkpoint + ".bigdl", checkpoint + ".bin") self.nn_estimator = NNEstimator(self.model, self.loss, self.feature_preprocessing, self.label_preprocessing) if self.optimizer is None: from bigdl.optim.optimizer import SGD self.optimizer = SGD() self.nn_estimator.setOptimMethod(self.optimizer) self.estimator = SparkEstimator(self.model, self.optimizer, self.model_dir) self.nn_model = NNModel( self.model, feature_preprocessing=self.feature_preprocessing) return self
def load(self, checkpoint, optimizer=None, loss=None, feature_preprocessing=None, label_preprocessing=None, model_dir=None, is_checkpoint=False): if loss is not None: self.loss = loss if optimizer is not None: self.optimizer = optimizer if feature_preprocessing is not None: self.feature_preprocessing = feature_preprocessing if label_preprocessing is not None: self.label_preprocessing = label_preprocessing if model_dir is not None: self.model_dir = model_dir if is_checkpoint: from zoo.orca.learn.utils import find_latest_checkpoint from zoo.pipeline.api.net import Net from bigdl.nn.layer import Model, Container from bigdl.optim.optimizer import OptimMethod import os path, prefix, version = find_latest_checkpoint(checkpoint, model_type="bigdl") if path is None: raise ValueError( "Cannot find BigDL checkpoint, please check your checkpoint path." ) try: self.model = Model.load( os.path.join(path, "model.{}".format(version))) assert isinstance(self.model, Container), \ "The loaded model should be a Container, please check your checkpoint type." self.optimizer = OptimMethod.load( os.path.join(path, "{}.{}".format(prefix, version))) except Exception: raise ValueError( "Cannot load BigDL checkpoint, please check your checkpoint path " "and checkpoint type.") self.estimator = SparkEstimator(self.model, self.optimizer, self.model_dir) self.nn_estimator = NNEstimator(self.model, self.loss, self.feature_preprocessing, self.label_preprocessing) if self.optimizer is not None: self.nn_estimator.setOptimMethod(self.optimizer) self.nn_model = NNModel( self.model, feature_preprocessing=self.feature_preprocessing) else: from zoo.pipeline.api.net import Net self.model = Net.load_bigdl(checkpoint + ".bigdl", checkpoint + ".bin") self.nn_estimator = NNEstimator(self.model, self.loss, self.feature_preprocessing, self.label_preprocessing) if self.optimizer is None: from bigdl.optim.optimizer import SGD self.optimizer = SGD() self.nn_estimator.setOptimMethod(self.optimizer) self.estimator = SparkEstimator(self.model, self.optimizer, self.model_dir) self.nn_model = NNModel( self.model, feature_preprocessing=self.feature_preprocessing) return self