def evaluate(self, data, batch_size=32, feature_cols=None, label_cols=None): from zoo.orca.data.utils import xshard_to_sample assert data is not None, "validation data shouldn't be None" assert self.metrics is not None, "metrics shouldn't be None, please specify the metrics" \ " argument when creating this estimator." if isinstance(data, SparkXShards): val_feature_set = FeatureSet.sample_rdd( data.rdd.flatMap(xshard_to_sample)) result = self.estimator.evaluate(val_feature_set, self.metrics, batch_size) elif isinstance(data, DataFrame): schema = data.schema val_feature_set = FeatureSet.sample_rdd( data.rdd.map(lambda row: row_to_sample( row, schema, feature_cols, label_cols))) result = self.estimator.evaluate(val_feature_set, self.metrics, batch_size) elif isinstance(data, DataLoader) or callable(data): val_feature_set = FeatureSet.pytorch_dataloader(data) result = self.estimator.evaluate_minibatch(val_feature_set, self.metrics) else: raise ValueError( "Data should be a SparkXShards, a DataLoader or a callable " "data_creator, but get " + data.__class__.__name__) return bigdl_metric_results_to_dict(result)
def evaluate(self, data, batch_size=32, feature_cols=None, label_cols=None): """ Evaluate model. :param data: validation data. It can be XShards, each partition is a dictionary of {'x': feature, 'y': label}, where feature(label) is a numpy array or a list of numpy arrays. :param batch_size: Batch size used for validation. Default: 32. :param feature_cols: (Not supported yet) Feature column name(s) of data. Only used when data is a Spark DataFrame. Default: None. :param label_cols: (Not supported yet) Label column name(s) of data. Only used when data is a Spark DataFrame. Default: None. :return: """ assert data is not None, "validation data shouldn't be None" assert self.metrics is not None, "metrics shouldn't be None, please specify the metrics" \ " argument when creating this estimator." if isinstance(data, DataFrame): raise NotImplementedError elif isinstance(data, SparkXShards): from zoo.orca.data.utils import xshard_to_sample val_feature_set = FeatureSet.sample_rdd( data.rdd.flatMap(xshard_to_sample)) result = self.estimator.evaluate(val_feature_set, self.metrics, batch_size) else: raise ValueError( "Data should be XShards or Spark DataFrame, but get " + data.__class__.__name__) return bigdl_metric_results_to_dict(result)
def evaluate(self, data, batch_size=32, feature_cols="features", label_cols="label"): """ Evaluate model. :param data: validation data. It can be XShardsor or Spark DataFrame, each partition is a dictionary of {'x': feature, 'y': label}, where feature(label) is a numpy array or a list of numpy arrays. :param batch_size: Batch size used for validation. Default: 32. :param feature_cols: (Not supported yet) Feature column name(s) of data. Only used when data is a Spark DataFrame. Default: None. :param label_cols: (Not supported yet) Label column name(s) of data. Only used when data is a Spark DataFrame. Default: None. :return: """ assert data is not None, "validation data shouldn't be None" assert self.metrics is not None, "metrics shouldn't be None, please specify the metrics" \ " argument when creating this estimator." if isinstance(data, DataFrame): if isinstance(feature_cols, list): data, _, feature_cols = \ BigDLEstimator._combine_cols(data, [feature_cols], col_name="features") if isinstance(label_cols, list): data, _, label_cols = \ BigDLEstimator._combine_cols(data, label_cols, col_name="label") self.nn_estimator._setNNBatchSize(batch_size)._setNNFeaturesCol(feature_cols) \ ._setNNLabelCol(label_cols) self.nn_estimator.setValidation(None, None, self.metrics, batch_size) if self.log_dir is not None and self.app_name is not None: from bigdl.optim.optimizer import TrainSummary from bigdl.optim.optimizer import ValidationSummary val_summary = ValidationSummary(log_dir=self.log_dir, app_name=self.app_name) self.nn_estimator.setValidationSummary(val_summary) result = self.nn_estimator._eval(data) elif isinstance(data, SparkXShards): from zoo.orca.data.utils import xshard_to_sample val_feature_set = FeatureSet.sample_rdd( data.rdd.flatMap(xshard_to_sample)) result = self.estimator.evaluate(val_feature_set, self.metrics, batch_size) else: raise ValueError( "Data should be XShards or Spark DataFrame, but get " + data.__class__.__name__) return bigdl_metric_results_to_dict(result)
def evaluate(self, data, batch_size=32, feature_cols=None, label_cols=None, validation_metrics=None): """ Evaluate model. :param data: data: evaluation data. It can be an XShards, Spark Dataframe, PyTorch DataLoader and PyTorch DataLoader creator function. If data is an XShards, each partition can be a Pandas DataFrame or a dictionary of {'x': feature, 'y': label}, where feature(label) is a numpy array or a list of numpy arrays. :param batch_size: Batch size used for evaluation. Only used when data is a SparkXShard. :param feature_cols: Feature column name(s) of data. Only used when data is a Spark DataFrame or an XShards of Pandas DataFrame. Default: None. :param label_cols: Label column name(s) of data. Only used when data is a Spark DataFrame or an XShards of Pandas DataFrame. Default: None. :param validation_metrics: Orca validation metrics to be computed on validation_data. :return: validation results. """ from zoo.orca.data.utils import xshard_to_sample assert data is not None, "validation data shouldn't be None" assert self.metrics is not None, "metrics shouldn't be None, please specify the metrics" \ " argument when creating this estimator." if isinstance(data, SparkXShards): if data._get_class_name() == 'pandas.core.frame.DataFrame': data = process_xshards_of_pandas_dataframe( data, feature_cols, label_cols) val_feature_set = FeatureSet.sample_rdd( data.rdd.flatMap(xshard_to_sample)) result = self.estimator.evaluate(val_feature_set, self.metrics, batch_size) elif isinstance(data, DataFrame): schema = data.schema val_feature_set = FeatureSet.sample_rdd( data.rdd.map(lambda row: row_to_sample( row, schema, feature_cols, label_cols))) result = self.estimator.evaluate(val_feature_set, self.metrics, batch_size) elif isinstance(data, DataLoader) or callable(data): val_feature_set = FeatureSet.pytorch_dataloader(data) result = self.estimator.evaluate_minibatch(val_feature_set, self.metrics) else: raise ValueError( "Data should be a SparkXShards, a DataLoader or a callable " "data_creator, but get " + data.__class__.__name__) return bigdl_metric_results_to_dict(result)
def evaluate(self, data, batch_size=32, feature_cols=None, label_cols=None): assert data is not None, "validation data shouldn't be None" assert self.metrics is not None, "metrics shouldn't be None, please specify the metrics" \ " argument when creating this estimator." if isinstance(data, DataFrame): raise NotImplementedError elif isinstance(data, SparkXShards): from zoo.orca.data.utils import xshard_to_sample from zoo.orca.learn.metrics import Metrics val_feature_set = FeatureSet.sample_rdd(data.rdd.flatMap(xshard_to_sample)) result = self.estimator.evaluate(val_feature_set, self.metrics, batch_size) else: raise ValueError("Data should be XShards or Spark DataFrame, but get " + data.__class__.__name__) return bigdl_metric_results_to_dict(result)