def Fbeta( beta: float, average: bool = True, precision: Optional[Precision] = None, recall: Optional[Recall] = None, output_transform: Optional[Callable] = None, device: Optional[Union[str, torch.device]] = None, ) -> MetricsLambda: """Calculates F-beta score Args: beta (float): weight of precision in harmonic mean average (bool, optional): if True, F-beta score is computed as the unweighted average (across all classes in multiclass case), otherwise, returns a tensor with F-beta score for each class in multiclass case. precision (Precision, optional): precision object metric with `average=False` to compute F-beta score recall (Precision, optional): recall object metric with `average=False` to compute F-beta score output_transform (callable, optional): a callable that is used to transform the :class:`~ignite.engine.engine.Engine`'s ``process_function``'s output into the form expected by the metric. It is used only if precision or recall are not provided. device (str of torch.device, optional): optional device specification for internal storage. Returns: MetricsLambda, F-beta metric """ if not (beta > 0): raise ValueError("Beta should be a positive integer, but given {}".format(beta)) if precision is not None and output_transform is not None: raise ValueError("If precision argument is provided, output_transform should be None") if recall is not None and output_transform is not None: raise ValueError("If recall argument is provided, output_transform should be None") if precision is None: precision = Precision( output_transform=(lambda x: x) if output_transform is None else output_transform, average=False, device=device, ) elif precision._average: raise ValueError("Input precision metric should have average=False") if recall is None: recall = Recall( output_transform=(lambda x: x) if output_transform is None else output_transform, average=False, device=device, ) elif recall._average: raise ValueError("Input recall metric should have average=False") fbeta = (1.0 + beta ** 2) * precision * recall / (beta ** 2 * precision + recall + 1e-15) if average: fbeta = fbeta.mean().item() return fbeta
def ClassificationReport( beta: int = 1, output_dict: bool = False, output_transform: Callable = lambda x: x, device: Union[str, torch.device] = torch.device("cpu"), is_multilabel: bool = False, labels: Optional[List[str]] = None, ) -> MetricsLambda: r"""Build a text report showing the main classification metrics. The report resembles in functionality to `scikit-learn classification_report <https://scikit-learn.org/stable/modules/generated/sklearn.metrics.classification_report.html#sklearn.metrics.classification_report>`_ The underlying implementation doesn't use the sklearn function. Args: beta: weight of precision in harmonic mean output_dict: If True, return output as dict, otherwise return a str output_transform: a callable that is used to transform the :class:`~ignite.engine.engine.Engine`'s ``process_function``'s output into the form expected by the metric. This can be useful if, for example, you have a multi-output model and you want to compute the metric with respect to one of the outputs. is_multilabel: If True, the tensors are assumed to be multilabel. device: optional device specification for internal storage. labels: Optional list of label indices to include in the report Examples: .. include:: defaults.rst :start-after: :orphan: Multiclass case .. testcode:: 1 metric = ClassificationReport(output_dict=True) metric.attach(default_evaluator, "cr") y_true = torch.Tensor([2, 0, 2, 1, 0, 1]).long() y_pred = torch.Tensor([ [0.0266, 0.1719, 0.3055], [0.6886, 0.3978, 0.8176], [0.9230, 0.0197, 0.8395], [0.1785, 0.2670, 0.6084], [0.8448, 0.7177, 0.7288], [0.7748, 0.9542, 0.8573], ]) state = default_evaluator.run([[y_pred, y_true]]) print(state.metrics["cr"].keys()) print(state.metrics["cr"]["0"]) print(state.metrics["cr"]["1"]) print(state.metrics["cr"]["2"]) print(state.metrics["cr"]["macro avg"]) .. testoutput:: 1 dict_keys(['0', '1', '2', 'macro avg']) {'precision': 0.5, 'recall': 0.5, 'f1-score': 0.4999...} {'precision': 1.0, 'recall': 0.5, 'f1-score': 0.6666...} {'precision': 0.3333..., 'recall': 0.5, 'f1-score': 0.3999...} {'precision': 0.6111..., 'recall': 0.5, 'f1-score': 0.5222...} Multilabel case, the shapes must be (batch_size, num_categories, ...) .. testcode:: 2 metric = ClassificationReport(output_dict=True, is_multilabel=True) metric.attach(default_evaluator, "cr") y_true = torch.Tensor([ [0, 0, 1], [0, 0, 0], [0, 0, 0], [1, 0, 0], [0, 1, 1], ]).unsqueeze(0) y_pred = torch.Tensor([ [1, 1, 0], [1, 0, 1], [1, 0, 0], [1, 0, 1], [1, 1, 0], ]).unsqueeze(0) state = default_evaluator.run([[y_pred, y_true]]) print(state.metrics["cr"].keys()) print(state.metrics["cr"]["0"]) print(state.metrics["cr"]["1"]) print(state.metrics["cr"]["2"]) print(state.metrics["cr"]["macro avg"]) .. testoutput:: 2 dict_keys(['0', '1', '2', 'macro avg']) {'precision': 0.2, 'recall': 1.0, 'f1-score': 0.3333...} {'precision': 0.5, 'recall': 1.0, 'f1-score': 0.6666...} {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0} {'precision': 0.2333..., 'recall': 0.6666..., 'f1-score': 0.3333...} """ # setup all the underlying metrics precision = Precision(average=False, is_multilabel=is_multilabel, output_transform=output_transform, device=device) recall = Recall(average=False, is_multilabel=is_multilabel, output_transform=output_transform, device=device) fbeta = Fbeta(beta, average=False, precision=precision, recall=recall) averaged_precision = precision.mean() averaged_recall = recall.mean() averaged_fbeta = fbeta.mean() def _wrapper(recall_metric: Metric, precision_metric: Metric, f: Metric, a_recall: Metric, a_precision: Metric, a_f: Metric) -> Union[Collection[str], Dict]: p_tensor, r_tensor, f_tensor = precision_metric, recall_metric, f if p_tensor.shape != r_tensor.shape: raise ValueError( "Internal error: Precision and Recall have mismatched shapes: " f"{p_tensor.shape} vs {r_tensor.shape}. Please, open an issue " "with a reference on this error. Thank you!") dict_obj = {} for idx, p_label in enumerate(p_tensor): dict_obj[_get_label_for_class(idx)] = { "precision": p_label.item(), "recall": r_tensor[idx].item(), "f{0}-score".format(beta): f_tensor[idx].item(), } dict_obj["macro avg"] = { "precision": a_precision.item(), "recall": a_recall.item(), "f{0}-score".format(beta): a_f.item(), } return dict_obj if output_dict else json.dumps(dict_obj) # helper method to get a label for a given class def _get_label_for_class(idx: int) -> str: return labels[idx] if labels else str(idx) return MetricsLambda(_wrapper, recall, precision, fbeta, averaged_recall, averaged_precision, averaged_fbeta)
def Fbeta( beta: float, average: bool = True, precision: Optional[Precision] = None, recall: Optional[Recall] = None, output_transform: Optional[Callable] = None, device: Union[str, torch.device] = torch.device("cpu"), ) -> MetricsLambda: r"""Calculates F-beta score. .. math:: F_\beta = \left( 1 + \beta^2 \right) * \frac{ \text{precision} * \text{recall} } { \left( \beta^2 * \text{precision} \right) + \text{recall} } where :math:`\beta` is a positive real factor. Args: beta (float): weight of precision in harmonic mean average (bool, optional): if True, F-beta score is computed as the unweighted average (across all classes in multiclass case), otherwise, returns a tensor with F-beta score for each class in multiclass case. precision (Precision, optional): precision object metric with `average=False` to compute F-beta score recall (Precision, optional): recall object metric with `average=False` to compute F-beta score output_transform (callable, optional): a callable that is used to transform the :class:`~ignite.engine.engine.Engine`'s ``process_function``'s output into the form expected by the metric. It is used only if precision or recall are not provided. device (str or torch.device): specifies which device updates are accumulated on. Setting the metric's device to be the same as your ``update`` arguments ensures the ``update`` method is non-blocking. By default, CPU. Returns: MetricsLambda, F-beta metric """ if not (beta > 0): raise ValueError( f"Beta should be a positive integer, but given {beta}") if precision is not None and output_transform is not None: raise ValueError( "If precision argument is provided, output_transform should be None" ) if recall is not None and output_transform is not None: raise ValueError( "If recall argument is provided, output_transform should be None") if precision is None: precision = Precision( output_transform=(lambda x: x) if output_transform is None else output_transform, # type: ignore[arg-type] average=False, device=device, ) elif precision._average: raise ValueError("Input precision metric should have average=False") if recall is None: recall = Recall( output_transform=(lambda x: x) if output_transform is None else output_transform, # type: ignore[arg-type] average=False, device=device, ) elif recall._average: raise ValueError("Input recall metric should have average=False") fbeta = (1.0 + beta**2) * precision * recall / (beta**2 * precision + recall + 1e-15) if average: fbeta = fbeta.mean().item() return fbeta
def Fbeta( beta: float, average: bool = True, precision: Optional[Precision] = None, recall: Optional[Recall] = None, output_transform: Optional[Callable] = None, device: Union[str, torch.device] = torch.device("cpu"), ) -> MetricsLambda: r"""Calculates F-beta score. .. math:: F_\beta = \left( 1 + \beta^2 \right) * \frac{ \text{precision} * \text{recall} } { \left( \beta^2 * \text{precision} \right) + \text{recall} } where :math:`\beta` is a positive real factor. - ``update`` must receive output of the form ``(y_pred, y)`` or ``{'y_pred': y_pred, 'y': y}``. - `y_pred` must be in the following shape (batch_size, num_categories, ...) or (batch_size, ...). - `y` must be in the following shape (batch_size, ...). Args: beta: weight of precision in harmonic mean average: if True, F-beta score is computed as the unweighted average (across all classes in multiclass case), otherwise, returns a tensor with F-beta score for each class in multiclass case. precision: precision object metric with `average=False` to compute F-beta score recall: recall object metric with `average=False` to compute F-beta score output_transform: a callable that is used to transform the :class:`~ignite.engine.engine.Engine`'s ``process_function``'s output into the form expected by the metric. It is used only if precision or recall are not provided. device: specifies which device updates are accumulated on. Setting the metric's device to be the same as your ``update`` arguments ensures the ``update`` method is non-blocking. By default, CPU. Returns: MetricsLambda, F-beta metric Examples: Binary case .. testcode:: 1 P = Precision(average=False) R = Recall(average=False) metric = Fbeta(beta=1.0, precision=P, recall=R) metric.attach(default_evaluator, "f-beta") y_true = torch.Tensor([1, 0, 1, 1, 0, 1]) y_pred = torch.Tensor([1, 0, 1, 0, 1, 1]) state = default_evaluator.run([[y_pred, y_true]]) print(state.metrics["f-beta"]) .. testoutput:: 1 0.7499... Multiclass case .. testcode:: 2 P = Precision(average=False) R = Recall(average=False) metric = Fbeta(beta=1.0, precision=P, recall=R) metric.attach(default_evaluator, "f-beta") y_true = torch.Tensor([2, 0, 2, 1, 0, 1]).long() y_pred = torch.Tensor([ [0.0266, 0.1719, 0.3055], [0.6886, 0.3978, 0.8176], [0.9230, 0.0197, 0.8395], [0.1785, 0.2670, 0.6084], [0.8448, 0.7177, 0.7288], [0.7748, 0.9542, 0.8573], ]) state = default_evaluator.run([[y_pred, y_true]]) print(state.metrics["f-beta"]) .. testoutput:: 2 0.5222... F-beta can be computed for each class as done below: .. testcode:: 3 P = Precision(average=False) R = Recall(average=False) metric = Fbeta(beta=1.0, average=False, precision=P, recall=R) metric.attach(default_evaluator, "f-beta") y_true = torch.Tensor([2, 0, 2, 1, 0, 1]).long() y_pred = torch.Tensor([ [0.0266, 0.1719, 0.3055], [0.6886, 0.3978, 0.8176], [0.9230, 0.0197, 0.8395], [0.1785, 0.2670, 0.6084], [0.8448, 0.7177, 0.7288], [0.7748, 0.9542, 0.8573], ]) state = default_evaluator.run([[y_pred, y_true]]) print(state.metrics["f-beta"]) .. testoutput:: 3 tensor([0.5000, 0.6667, 0.4000], dtype=torch.float64) The elements of `y` and `y_pred` should have 0 or 1 values. Thresholding of predictions can be done as below: .. testcode:: 4 def thresholded_output_transform(output): y_pred, y = output y_pred = torch.round(y_pred) return y_pred, y P = Precision(average=False, output_transform=thresholded_output_transform) R = Recall(average=False, output_transform=thresholded_output_transform) metric = Fbeta(beta=1.0, precision=P, recall=R) metric.attach(default_evaluator, "f-beta") y_true = torch.Tensor([1, 0, 1, 1, 0, 1]) y_pred = torch.Tensor([0.6, 0.2, 0.9, 0.4, 0.7, 0.65]) state = default_evaluator.run([[y_pred, y_true]]) print(state.metrics["f-beta"]) .. testoutput:: 4 0.7499... """ if not (beta > 0): raise ValueError( f"Beta should be a positive integer, but given {beta}") if precision is not None and output_transform is not None: raise ValueError( "If precision argument is provided, output_transform should be None" ) if recall is not None and output_transform is not None: raise ValueError( "If recall argument is provided, output_transform should be None") if precision is None: precision = Precision( output_transform=(lambda x: x) if output_transform is None else output_transform, # type: ignore[arg-type] average=False, device=device, ) elif precision._average: raise ValueError("Input precision metric should have average=False") if recall is None: recall = Recall( output_transform=(lambda x: x) if output_transform is None else output_transform, # type: ignore[arg-type] average=False, device=device, ) elif recall._average: raise ValueError("Input recall metric should have average=False") fbeta = (1.0 + beta**2) * precision * recall / (beta**2 * precision + recall + 1e-15) if average: fbeta = fbeta.mean().item() return fbeta
def ClassificationReport( beta: int = 1, output_dict: bool = False, output_transform: Callable = lambda x: x, device: Union[str, torch.device] = torch.device("cpu"), is_multilabel: bool = False, labels: Optional[List[str]] = None, ) -> MetricsLambda: r"""Build a text report showing the main classification metrics. The report resembles in functionality to `scikit-learn classification_report <https://scikit-learn.org/stable/modules/generated/sklearn.metrics.classification_report.html#sklearn.metrics.classification_report>`_ The underlying implementation doesn't use the sklearn function. Args: beta: weight of precision in harmonic mean output_dict: If True, return output as dict, otherwise return a str output_transform: a callable that is used to transform the :class:`~ignite.engine.engine.Engine`'s ``process_function``'s output into the form expected by the metric. This can be useful if, for example, you have a multi-output model and you want to compute the metric with respect to one of the outputs. is_multilabel: If True, the tensors are assumed to be multilabel. device: optional device specification for internal storage. labels: Optional list of label indices to include in the report .. code-block:: python def process_function(engine, batch): # ... return y_pred, y engine = Engine(process_function) metric = ClassificationReport() metric.attach(engine, "cr") engine.run... res = engine.state.metrics["cr"] # result should be like { "0": { "precision": 0.4891304347826087, "recall": 0.5056179775280899, "f1-score": 0.497237569060773 }, "1": { "precision": 0.5157232704402516, "recall": 0.4992389649923896, "f1-score": 0.507347254447022 }, "macro avg": { "precision": 0.5024268526114302, "recall": 0.5024284712602398, "f1-score": 0.5022924117538975 } } """ # setup all the underlying metrics precision = Precision( average=False, is_multilabel=is_multilabel, output_transform=output_transform, device=device, ) recall = Recall( average=False, is_multilabel=is_multilabel, output_transform=output_transform, device=device, ) fbeta = Fbeta(beta, average=False, precision=precision, recall=recall) averaged_precision = precision.mean() averaged_recall = recall.mean() averaged_fbeta = fbeta.mean() def _wrapper( recall_metric: Metric, precision_metric: Metric, f: Metric, a_recall: Metric, a_precision: Metric, a_f: Metric, ) -> Union[Collection[str], Dict]: p_tensor, r_tensor, f_tensor = precision_metric, recall_metric, f if p_tensor.shape != r_tensor.shape: raise ValueError( "Internal error: Precision and Recall have mismatched shapes: " f"{p_tensor.shape} vs {r_tensor.shape}. Please, open an issue " "with a reference on this error. Thank you!") dict_obj = {} for idx, p_label in enumerate(p_tensor): dict_obj[_get_label_for_class(idx)] = { "precision": p_label.item(), "recall": r_tensor[idx].item(), "f{0}-score".format(beta): f_tensor[idx].item(), } dict_obj["macro avg"] = { "precision": a_precision.item(), "recall": a_recall.item(), "f{0}-score".format(beta): a_f.item(), } return dict_obj if output_dict else json.dumps(dict_obj) # helper method to get a label for a given class def _get_label_for_class(idx: int) -> str: return labels[idx] if labels else str(idx) return MetricsLambda( _wrapper, recall, precision, fbeta, averaged_recall, averaged_precision, averaged_fbeta, )