def test_get_metric_fn(): """Tests probflow.utils.metrics.get_metric_fn""" metric_fn = metrics.get_metric_fn('f1') # Predictive dist probs = tf.constant([0, 1, 1, 1, 1, 0], dtype=tf.float32) pred_dist = Bernoulli(probs=probs) y_true = np.array([1, 0, 1, 1, 0, 0]).astype('float32') # Compare metric ppv = 2 / 4 tpr = 2 / 3 f1 = 2 * (ppv * tpr) / (ppv + tpr) assert is_close(metric_fn(y_true, pred_dist.mean()), f1) # Should be able to pass a callable metric_fn = metrics.get_metric_fn(lambda x, y: 3) assert metric_fn(y_true, pred_dist.mean()) == 3 # Should raise a type error if passed anything else with pytest.raises(TypeError): metrics.get_metric_fn(3) with pytest.raises(TypeError): metrics.get_metric_fn([1, 2, 3]) with pytest.raises(TypeError): metrics.get_metric_fn({'apples': 1, 'oranges': 2}) # And value error if invalid string with pytest.raises(ValueError): metrics.get_metric_fn('asdf')
def __init__(self, metric, x, y=None, verbose=True): # Store metric self.metric_fn = get_metric_fn(metric) # Store validation data self.data = make_generator(x, y) # Store metrics and epochs self.current_metric = np.nan self.current_epoch = 0 self.metrics = [] self.epochs = [] self.verbose = verbose
def __init__(self, metric, x, y=None, verbose=False): # Store metric self.metric_fn = get_metric_fn(metric) if isinstance(metric, str): self.metric_name = metric else: self.metric_name = self.metric_fn.__name__ # Store validation data self.data = make_generator(x, y) # Store metrics and epochs self.current_metric = np.nan self.current_epoch = 0 self.metrics = [] self.epochs = [] self.verbose = verbose
def metric(self, metric, x, y=None, batch_size=None): """Compute a metric of model performance TODO: docs TODO: note that this doesn't work w/ generative models Parameters ---------- metric : str or callable Metric to evaluate. Available metrics: * 'lp': log likelihood sum * 'log_prob': log likelihood sum * 'accuracy': accuracy * 'acc': accuracy * 'mean_squared_error': mean squared error * 'mse': mean squared error * 'sum_squared_error': sum squared error * 'sse': sum squared error * 'mean_absolute_error': mean absolute error * 'mae': mean absolute error * 'r_squared': coefficient of determination * 'r2': coefficient of determination * 'recall': true positive rate * 'sensitivity': true positive rate * 'true_positive_rate': true positive rate * 'tpr': true positive rate * 'specificity': true negative rate * 'selectivity': true negative rate * 'true_negative_rate': true negative rate * 'tnr': true negative rate * 'precision': precision * 'f1_score': F-measure * 'f1': F-measure * callable: a function which takes (y_true, y_pred) x : |ndarray| or |DataFrame| or |Series| or Tensor or |DataGenerator| Independent variable values of the dataset to evaluate (aka the "features"). Or a |DataGenerator| to generate both x and y. y : |ndarray| or |DataFrame| or |Series| or Tensor Dependent variable values of the dataset to evaluate (aka the "target"). batch_size : None or int Compute using batches of this many datapoints. Default is `None` (i.e., do not use batching). Returns ------- TODO """ # Get true values and predictions y_true = [] y_pred = [] for x_data, y_data in make_generator( x, y, test=True, batch_size=batch_size ): y_true += [y_data] y_pred += [self(x_data).mean()] y_true = np.concatenate(to_numpy(y_true), axis=0) y_pred = np.concatenate(to_numpy(y_pred), axis=0) # Compute metric between true values and predictions metric_fn = get_metric_fn(metric) return metric_fn(y_true, y_pred)