def test_calculate_loss(): # In a 0-1 ranged scorer, make sure that the loss # has a expected positive value y_pred = np.array([0, 1, 0, 1, 1, 1, 0, 0, 0, 0]) y_true = np.array([0, 1, 0, 1, 1, 0, 0, 0, 0, 0]) score = sklearn.metrics.accuracy_score(y_true, y_pred) assert pytest.approx(score) == calculate_score( target=y_true, prediction=y_pred, task_type=TABULAR_CLASSIFICATION, metrics=[accuracy], )['accuracy'] loss = 1.0 - score assert pytest.approx(loss) == calculate_loss( target=y_true, prediction=y_pred, task_type=TABULAR_CLASSIFICATION, metrics=[accuracy], )['accuracy'] # Test the dictionary case score_dict = calculate_score( target=y_true, prediction=y_pred, task_type=TABULAR_CLASSIFICATION, metrics=[accuracy, balanced_accuracy], ) expected_score_dict = { 'accuracy': 0.9, 'balanced_accuracy': 0.9285714285714286, } loss_dict = calculate_loss( target=y_true, prediction=y_pred, task_type=TABULAR_CLASSIFICATION, metrics=[accuracy, balanced_accuracy], ) for expected_metric, expected_score in expected_score_dict.items(): assert pytest.approx(expected_score) == score_dict[expected_metric] assert pytest.approx(1 - expected_score) == loss_dict[expected_metric] # Lastly make sure that metrics whose optimum is zero # are also properly working y_true = np.array([0.1, 0.2, 0.3, 0.4, 0.5, 0.6]) y_pred = np.array([0.11, 0.22, 0.33, 0.44, 0.55, 0.66]) score = sklearn.metrics.mean_squared_error(y_true, y_pred) assert pytest.approx(score) == calculate_score( target=y_true, prediction=y_pred, task_type=TABULAR_REGRESSION, metrics=[mean_squared_error], )['mean_squared_error'] loss = score assert pytest.approx(loss) == calculate_loss( target=y_true, prediction=y_pred, task_type=TABULAR_REGRESSION, metrics=[mean_squared_error], )['mean_squared_error']
def score(self, X: np.ndarray, y: np.ndarray, batch_size: Optional[int] = None, metric_name: str = 'accuracy') -> float: """Scores the fitted estimator on (X, y) Args: X (np.ndarray): input to the pipeline, from which to guess targets batch_size (Optional[int]): batch_size controls whether the pipeline will be called on small chunks of the data. Useful when calling the predict method on the whole array X results in a MemoryError. y (np.ndarray): Ground Truth labels metric_name (str: default = 'accuracy'): name of the metric to be calculated Returns: float: score based on the metric name """ from autoPyTorch.pipeline.components.training.metrics.utils import get_metrics, calculate_score metrics = get_metrics(self.dataset_properties, [metric_name]) y_pred = self.predict(X, batch_size=batch_size) score = calculate_score(y, y_pred, task_type=STRING_TO_TASK_TYPES[str( self.dataset_properties['task_type'])], metrics=metrics)[metric_name] return score
def compute_metrics(self, outputs_data: np.ndarray, targets_data: np.ndarray) -> Dict[str, float]: # TODO: change once Ravin Provides the PR outputs_data = torch.cat(outputs_data, dim=0) targets_data = torch.cat(targets_data, dim=0) return calculate_score(targets_data, outputs_data, self.task_type, self.metrics)
def test_classification_only_metric(): y_true = np.array([1.0, 1.0, 1.0, 0.0, 0.0, 0.0]) y_pred = \ np.array([[0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0]]) scorer = accuracy score = calculate_score(y_true, y_pred, TABULAR_CLASSIFICATION, [scorer]) previous_score = scorer._optimum assert score['accuracy'] == pytest.approx(previous_score)
def test_metrics(self): # test of all classification metrics dataset_properties = {'task_type': 'tabular_classification'} y_target = np.array([0, 1, 0, 1]) y_pred = np.array([0, 0, 0, 1]) metrics = get_metrics(dataset_properties=dataset_properties, all_supported_metrics=True) score_dict = calculate_score(y_pred, y_target, STRING_TO_TASK_TYPES[dataset_properties['task_type']], metrics) self.assertIsInstance(score_dict, dict) for name, score in score_dict.items(): self.assertIsInstance(name, str) self.assertIsInstance(score, float) # test of all regression metrics dataset_properties = {'task_type': 'tabular_regression'} y_target = np.array([0.1, 0.6, 0.7, 0.4]) y_pred = np.array([0.6, 0.7, 0.4, 1]) metrics = get_metrics(dataset_properties=dataset_properties, all_supported_metrics=True) score_dict = calculate_score(y_pred, y_target, STRING_TO_TASK_TYPES[dataset_properties['task_type']], metrics) self.assertIsInstance(score_dict, dict) for name, score in score_dict.items(): self.assertIsInstance(name, str) self.assertIsInstance(score, float)
def test_classification_metrics(): # test of all classification metrics dataset_properties = { 'task_type': TASK_TYPES_TO_STRING[TABULAR_CLASSIFICATION], 'output_type': OUTPUT_TYPES_TO_STRING[BINARY] } y_target = np.array([0, 1, 0, 1]) y_pred = np.array([0, 0, 0, 1]) metrics = get_metrics(dataset_properties=dataset_properties, all_supported_metrics=True) score_dict = calculate_score( y_pred, y_target, STRING_TO_TASK_TYPES[dataset_properties['task_type']], metrics) assert isinstance(score_dict, dict) for name, score in score_dict.items(): assert isinstance(name, str) assert isinstance(score, float)
def test_regression_metrics(): # test of all regression metrics dataset_properties = { 'task_type': TASK_TYPES_TO_STRING[TABULAR_REGRESSION], 'output_type': OUTPUT_TYPES_TO_STRING[CONTINUOUS] } y_target = np.array([0.1, 0.6, 0.7, 0.4]) y_pred = np.array([0.6, 0.7, 0.4, 1]) metrics = get_metrics(dataset_properties=dataset_properties, all_supported_metrics=True) score_dict = calculate_score( y_pred, y_target, STRING_TO_TASK_TYPES[dataset_properties['task_type']], metrics) assert isinstance(score_dict, dict) for name, score in score_dict.items(): assert isinstance(name, str) assert isinstance(score, float)
def score(self, X: np.ndarray, y: np.ndarray, batch_size: Optional[int] = None) -> np.ndarray: """score. Args: X (np.ndarray): input to the pipeline, from which to guess targets batch_size (Optional[int]): batch_size controls whether the pipeline will be called on small chunks of the data. Useful when calling the predict method on the whole array X results in a MemoryError. Returns: np.ndarray: coefficient of determination R^2 of the prediction """ from autoPyTorch.pipeline.components.training.metrics.utils import get_metrics, calculate_score metrics = get_metrics(self.dataset_properties, ['r2']) y_pred = self.predict(X, batch_size=batch_size) r2 = calculate_score(y, y_pred, task_type=STRING_TO_TASK_TYPES[ self.dataset_properties['task_type']], metrics=metrics)['r2'] return r2
def _fit( self, predictions: List[np.ndarray], labels: np.ndarray, ) -> None: """Fast version of Rich Caruana's ensemble selection method.""" self.num_input_models_ = len(predictions) ensemble = [] # type: List[np.ndarray] trajectory = [] order = [] ensemble_size = self.ensemble_size weighted_ensemble_prediction = np.zeros( predictions[0].shape, dtype=np.float64, ) fant_ensemble_prediction = np.zeros( weighted_ensemble_prediction.shape, dtype=np.float64, ) for i in range(ensemble_size): scores = np.zeros( (len(predictions)), dtype=np.float64, ) s = len(ensemble) if s == 0: weighted_ensemble_prediction.fill(0.0) else: weighted_ensemble_prediction.fill(0.0) for pred in ensemble: np.add( weighted_ensemble_prediction, pred, out=weighted_ensemble_prediction, ) np.multiply( weighted_ensemble_prediction, 1 / s, out=weighted_ensemble_prediction, ) np.multiply( weighted_ensemble_prediction, (s / float(s + 1)), out=weighted_ensemble_prediction, ) for j, pred in enumerate(predictions): # Memory-efficient averaging! fant_ensemble_prediction.fill(0.0) np.add( fant_ensemble_prediction, weighted_ensemble_prediction, out=fant_ensemble_prediction ) np.add( fant_ensemble_prediction, (1. / float(s + 1)) * pred, out=fant_ensemble_prediction ) # Calculate score is versatile and can return a dict of score # when all_scoring_functions=False, we know it will be a float score = calculate_score( metrics=[self.metric], solution=labels, prediction=fant_ensemble_prediction, task_type=self.task_type, ) scores[j] = self.metric._optimum - score[self.metric.name] all_best = np.argwhere(scores == np.nanmin(scores)).flatten() best = self.random_state.choice(all_best) ensemble.append(predictions[best]) trajectory.append(scores[best]) order.append(best) # Handle special case if len(predictions) == 1: break self.indices_ = order self.trajectory_ = trajectory self.train_score_ = trajectory[-1]