def test_classification_per_class_accuracy_complete_prediction(self): annotation = [ClassificationAnnotation('identifier_1', 1), ClassificationAnnotation('identifier_2', 0)] prediction = [ ClassificationPrediction('identifier_1', [1.0, 2.0]), ClassificationPrediction('identifier_2', [2.0, 1.0]) ] dataset = DummyDataset(label_map={0: '0', 1: '1'}) dispatcher = MetricsExecutor([{'type': 'accuracy_per_class', 'top_k': 1}], dataset) dispatcher.update_metrics_on_batch(annotation, prediction) for _, evaluation_result in dispatcher.iterate_metrics(annotation, prediction): assert evaluation_result.name == 'accuracy_per_class' assert len(evaluation_result.evaluated_value) == 2 assert evaluation_result.evaluated_value[0] == pytest.approx(1.0) assert evaluation_result.evaluated_value[1] == pytest.approx(1.0) assert evaluation_result.reference_value is None assert evaluation_result.threshold is None
def test_accuracy_on_container_with_wrong_annotation_source_name_raise_config_error_exception( self): annotations = [ ContainerAnnotation( {'annotation': ClassificationAnnotation('identifier', 3)}) ] predictions = [ ClassificationPrediction('identifier', [1.0, 1.0, 1.0, 4.0]) ] dispatcher = MetricsExecutor([{ 'type': 'accuracy', 'top_k': 1, 'annotation_source': 'a' }], None) with pytest.raises(ConfigError): dispatcher.update_metrics_on_batch(range(len(annotations)), annotations, predictions)
def test_classification_per_class_accuracy_fully_zero_prediction(self): annotation = ClassificationAnnotation('identifier', 0) prediction = ClassificationPrediction('identifier', [1.0, 2.0]) dataset = DummyDataset(label_map={0: '0', 1: '1'}) dispatcher = MetricsExecutor([{ 'type': 'accuracy_per_class', 'top_k': 1 }], dataset) dispatcher.update_metrics_on_batch(range(1), [annotation], [prediction]) for _, evaluation_result in dispatcher.iterate_metrics([annotation], [prediction]): assert evaluation_result.name == 'accuracy_per_class' assert len(evaluation_result.evaluated_value) == 2 assert evaluation_result.evaluated_value[0] == pytest.approx(0.0) assert evaluation_result.evaluated_value[1] == pytest.approx(0.0) assert evaluation_result.reference_value is None assert evaluation_result.threshold is None
def test_threshold_is_10_by_config(self): annotations = [ClassificationAnnotation('identifier', 3)] predictions = [ ClassificationPrediction('identifier', [5.0, 3.0, 4.0, 1.0]) ] dispatcher = MetricsExecutor([{ 'type': 'accuracy', 'top_k': 3, 'threshold': 10 }], None) for _, evaluation_result in dispatcher.iterate_metrics([annotations], [predictions]): assert evaluation_result.name == 'accuracy' assert evaluation_result.evaluated_value == 0.0 assert evaluation_result.reference_value is None assert evaluation_result.threshold == 10
def test_accuracy_with_unsupported_prediction_type_as_prediction_source_for_container_raises_config_error( self): annotations = [ClassificationAnnotation('identifier', 3)] predictions = [ ContainerPrediction({ 'prediction': DetectionPrediction('identifier', [1.0, 1.0, 1.0, 4.0]) }) ] dispatcher = MetricsExecutor([{ 'type': 'accuracy', 'top_k': 1, 'prediction_source': 'prediction' }], None) with pytest.raises(ConfigError): dispatcher.update_metrics_on_batch(range(len(annotations)), annotations, predictions)
def test_accuracy_on_annotation_container_with_several_suitable_representations_config_value_error_exception( self): annotations = [ ContainerAnnotation({ 'annotation1': ClassificationAnnotation('identifier', 3), 'annotation2': ClassificationAnnotation('identifier', 3) }) ] predictions = [ ClassificationPrediction('identifier', [1.0, 1.0, 1.0, 4.0]) ] dispatcher = MetricsExecutor([{'type': 'accuracy', 'top_k': 1}], None) with pytest.raises(ConfigError): dispatcher.update_metrics_on_batch(range(len(annotations)), annotations, predictions)
def test_mae_on_interval_default_not_ignore_out_of_range(self): annotations = [ RegressionAnnotation('identifier', -1), RegressionAnnotation('identifier', 2), RegressionAnnotation('identifier', 0.5) ] predictions = [ RegressionPrediction('identifier', 1), RegressionPrediction('identifier', 2), RegressionPrediction('identifier', 1) ] config = [{'type': 'mae_on_interval', 'end': 1, 'ignore_values_not_in_interval': False}] expected = EvaluationResult( pytest.approx([2.0, 0.0, 0.5, 0.0, 0.0, 0.0]), None, 'mae_on_interval', 'mae_on_interval', None, None, { 'postfix': ' ', 'scale': 1, 'names': [ 'mean: < 0.0', 'std: < 0.0', 'mean: <= 0.0 < 1.0', 'std: <= 0.0 < 1.0', 'mean: > 1.0', 'std: > 1.0' ], 'calculate_mean': False, 'target': 'higher-worse', 'orig_names': ['mean: < 0.0', 'std: < 0.0', 'mean: <= 0.0 < 1.0', 'std: <= 0.0 < 1.0', 'mean: > 1.0', 'std: > 1.0'] }, None ) dispatcher = MetricsExecutor(config, None) dispatcher.update_metrics_on_batch(range(len(annotations)), annotations, predictions) for _, evaluation_result in dispatcher.iterate_metrics(annotations, predictions): assert evaluation_result == expected
def test_mse_with_zero_diff_between_annotation_and_prediction(self): annotations = [RegressionAnnotation('identifier', 3)] predictions = [RegressionPrediction('identifier', 3)] config = [{'type': 'mse'}] expected = EvaluationResult( pytest.approx([0.0, 0.0]), None, 'mse', 'mse', None, None, {'postfix': ' ', 'scale': 1, 'names': ['mean', 'std'], 'calculate_mean': False, 'target': 'higher-worse'}, None ) dispatcher = MetricsExecutor(config, None) dispatcher.update_metrics_on_batch(range(len(annotations)), annotations, predictions) for _, evaluation_result in dispatcher.iterate_metrics(annotations, predictions): assert evaluation_result == expected
def test_accuracy_with_unsupported_prediction_in_container_raise_config_error_exception( self): annotations = [ClassificationAnnotation('identifier', 3)] predictions = [ ContainerPrediction({ 'prediction': DetectionPrediction('identifier', [1.0, 1.0, 1.0, 4.0]) }) ] config = { 'annotation': 'mocked', 'metrics': [{ 'type': 'accuracy', 'top_k': 1 }] } dispatcher = MetricsExecutor(config, None) with pytest.raises(ConfigError): dispatcher.update_metrics_on_batch(annotations, predictions)
def test_config_vactor_presenter(self): annotations = [ClassificationAnnotation('identifier', 3)] predictions = [ ClassificationPrediction('identifier', [1.0, 1.0, 1.0, 4.0]) ] config = { 'annotation': 'mocked', 'metrics': [{ 'type': 'accuracy', 'top_k': 1, 'presenter': 'print_vector' }] } dispatcher = MetricsExecutor(config, None) dispatcher.update_metrics_on_batch(annotations, predictions) for presenter, _ in dispatcher.iterate_metrics(annotations, predictions): assert isinstance(presenter, VectorPrintPresenter)
def from_configs(cls, config): dataset_config = config['datasets'][0] dataset = Dataset(dataset_config) data_reader_config = dataset_config.get('reader', 'opencv_imread') data_source = dataset_config['data_source'] if isinstance(data_reader_config, str): reader = BaseReader.provide(data_reader_config, data_source) elif isinstance(data_reader_config, dict): reader = BaseReader.provide(data_reader_config['type'], data_source, data_reader_config) else: raise ConfigError('reader should be dict or string') preprocessing = PreprocessingExecutor( dataset_config.get('preprocessing', []), dataset.name) metrics_executor = MetricsExecutor(dataset_config['metrics'], dataset) launcher = create_launcher(config['launchers'][0], delayed_model_loading=True) model = SequentialModel(config.get('network_info', {}), launcher) return cls(dataset, reader, preprocessing, metrics_executor, launcher, model)
def test_classification_per_class_accuracy_prediction_top3(self): annotation = [ClassificationAnnotation('identifier_1', 1), ClassificationAnnotation('identifier_2', 1)] prediction = [ ClassificationPrediction('identifier_1', [1.0, 2.0, 3.0, 4.0]), ClassificationPrediction('identifier_2', [2.0, 1.0, 3.0, 4.0]) ] config = {'annotation': 'mocked', 'metrics': [{'type': 'accuracy_per_class', 'top_k': 3}]} dataset = DummyDataset(label_map={0: '0', 1: '1', 2: '2', 3: '3'}) dispatcher = MetricsExecutor(config, dataset) dispatcher.update_metrics_on_batch(annotation, prediction) for _, evaluation_result in dispatcher.iterate_metrics(annotation, prediction): assert evaluation_result.name == 'accuracy_per_class' assert len(evaluation_result.evaluated_value) == 4 assert evaluation_result.evaluated_value[0] == pytest.approx(0.0) assert evaluation_result.evaluated_value[1] == pytest.approx(0.5) assert evaluation_result.evaluated_value[2] == pytest.approx(0.0) assert evaluation_result.evaluated_value[3] == pytest.approx(0.0) assert evaluation_result.reference_value is None assert evaluation_result.threshold is None
def test_classification_accuracy_result_for_batch_1_with_named_metric( self): annotations = [ClassificationAnnotation('identifier', 3)] predictions = [ ClassificationPrediction('identifier', [1.0, 1.0, 1.0, 4.0]) ] dispatcher = MetricsExecutor([{ 'type': 'accuracy', 'top_k': 1, 'name': 'accuracy@top1' }], None) metric_result = dispatcher.update_metrics_on_batch( range(len(annotations)), annotations, predictions) expected_metric_result = PerImageMetricResult('accuracy@top1', 'accuracy', 1.0, 'higher-better') assert len(metric_result) == 1 assert 0 in metric_result assert len(metric_result[0]) == 1 assert metric_result[0][0] == expected_metric_result
def test_mae_on_interval_default_all_missed(self): annotations = [RegressionAnnotation('identifier', -2)] predictions = [RegressionPrediction('identifier', 1)] config = [{'type': 'mae_on_interval', 'end': 1}] expected = EvaluationResult(pytest.approx([0.0]), None, 'mae_on_interval', 'mae_on_interval', None, { 'postfix': ' ', 'scale': 1, 'names': [], 'calculate_mean': False }) dispatcher = MetricsExecutor(config, None) dispatcher.update_metrics_on_batch(annotations, predictions) with pytest.warns(UserWarning) as warnings: for _, evaluation_result in dispatcher.iterate_metrics( annotations, predictions): assert len(warnings) == 1 assert evaluation_result == expected
def test_zero_accuracy_top_3(self): annotations = [ClassificationAnnotation('identifier', 3)] predictions = [ ClassificationPrediction('identifier', [5.0, 3.0, 4.0, 1.0]) ] config = { 'annotation': 'mocked', 'metrics': [{ 'type': 'accuracy', 'top_k': 3 }] } dispatcher = MetricsExecutor(config, None) for _, evaluation_result in dispatcher.iterate_metrics( annotations, predictions): assert evaluation_result.name == 'accuracy' assert evaluation_result.evaluated_value == 0.0 assert evaluation_result.reference_value is None assert evaluation_result.threshold is None
def test_complete_accuracy_with_container_default_sources(self): annotations = [ ContainerAnnotation( {'a': ClassificationAnnotation('identifier', 3)}) ] predictions = [ ContainerPrediction({ 'p': ClassificationPrediction('identifier', [1.0, 1.0, 1.0, 4.0]) }) ] dispatcher = MetricsExecutor([{'type': 'accuracy', 'top_k': 1}], None) dispatcher.update_metrics_on_batch(range(len(annotations)), annotations, predictions) for _, evaluation_result in dispatcher.iterate_metrics( annotations, predictions): assert evaluation_result.name == 'accuracy' assert evaluation_result.evaluated_value == pytest.approx(1.0) assert evaluation_result.reference_value is None assert evaluation_result.threshold is None
def test_mae_on_interval_with_repeated_values(self): annotations = [ RegressionAnnotation('identifier', -1), RegressionAnnotation('identifier', 2), RegressionAnnotation('identifier', 1) ] predictions = [ RegressionPrediction('identifier', 1), RegressionPrediction('identifier', 3), RegressionPrediction('identifier', 1) ] config = [{ 'type': 'mae_on_interval', 'intervals': [0.0, 2.0, 2.0, 4.0] }] expected = EvaluationResult( pytest.approx([0.0, 0.0, 1.0, 0.0]), None, 'mae_on_interval', 'mae_on_interval', None, { 'postfix': ' ', 'scale': 1, 'names': [ 'mean: <= 0.0 < 2.0', 'std: <= 0.0 < 2.0', 'mean: <= 2.0 < 4.0', 'std: <= 2.0 < 4.0' ], 'calculate_mean': False, 'target': 'higher-worse' }) dispatcher = MetricsExecutor(config, None) dispatcher.update_metrics_on_batch(range(len(annotations)), annotations, predictions) for _, evaluation_result in dispatcher.iterate_metrics( annotations, predictions): assert evaluation_result == expected
def test_complete_accuracy(self): annotations = [ClassificationAnnotation('identifier', 3)] predictions = [ ClassificationPrediction('identifier', [1.0, 1.0, 1.0, 4.0]) ] config = { 'annotation': 'mocked', 'metrics': [{ 'type': 'accuracy', 'top_k': 1 }] } dispatcher = MetricsExecutor(config, None) dispatcher.update_metrics_on_batch(annotations, predictions) for _, evaluation_result in dispatcher.iterate_metrics( annotations, predictions): assert evaluation_result.name == 'accuracy' assert evaluation_result.evaluated_value == pytest.approx(1.0) assert evaluation_result.reference_value is None assert evaluation_result.threshold is None
def from_configs(cls, config): dataset_config = config['datasets'][0] dataset = Dataset(dataset_config) data_reader_config = dataset_config.get('reader', 'opencv_imread') data_source = dataset_config['data_source'] if isinstance(data_reader_config, str): reader = BaseReader.provide(data_reader_config, data_source) elif isinstance(data_reader_config, dict): reader = BaseReader.provide(data_reader_config['type'], data_source, data_reader_config) else: raise ConfigError('reader should be dict or string') preprocessing = PreprocessingExecutor(dataset_config.get('preprocessing', []), dataset.name) metrics_executor = MetricsExecutor(dataset_config['metrics'], dataset) launcher_settings = config['launchers'][0] supported_frameworks = ['dlsdk'] if not launcher_settings['framework'] in supported_frameworks: raise ConfigError('{} framework not supported'.format(launcher_settings['framework'])) launcher = create_launcher(launcher_settings, delayed_model_loading=True) network_info = config.get('network_info', {}) colorization_network = network_info.get('colorization_network', {}) verification_network = network_info.get('verification_network', {}) model_args = config.get('_models', []) models_is_blob = config.get('_model_is_blob') if 'model' not in colorization_network and model_args: colorization_network['model'] = model_args[0] colorization_network['_model_is_blob'] = models_is_blob if 'model' not in verification_network and model_args: verification_network['model'] = model_args[1 if len(model_args) > 1 else 0] verification_network['_model_is_blob'] = models_is_blob network_info.update({ 'colorization_network': colorization_network, 'verification_network': verification_network }) if not contains_all(network_info, ['colorization_network', 'verification_network']): raise ConfigError('configuration for colorization_network/verification_network does not exist') test_model = ColorizationTestModel(network_info['colorization_network'], launcher) check_model = ColorizationCheckModel(network_info['verification_network'], launcher) return cls(dataset, reader, preprocessing, metrics_executor, launcher, test_model, check_model)
def test_mae_with_positive_diff_bitween_annotation_and_prediction(self): annotations = [ RegressionAnnotation('identifier', 3), RegressionAnnotation('identifier2', 1) ] predictions = [ RegressionPrediction('identifier', 1), RegressionPrediction('identifier2', -3) ] config = {'annotation': 'mocked', 'metrics': [{'type': 'mae'}]} expected = EvaluationResult( pytest.approx([3.0, 1.0]), None, 'mae', None, { 'postfix': ' ', 'scale': 1, 'names': ['mean', 'std'], 'calculate_mean': False }) dispatcher = MetricsExecutor(config, None) dispatcher.update_metrics_on_batch(annotations, predictions) for _, evaluation_result in dispatcher.iterate_metrics( annotations, predictions): assert evaluation_result == expected
def test_classification_accuracy_result_for_batch_2(self): annotations = [ ClassificationAnnotation('identifier', 3), ClassificationAnnotation('identifier1', 1) ] predictions = [ ClassificationPrediction('identifier', [1.0, 1.0, 1.0, 4.0]), ClassificationPrediction('identifier2', [1.0, 1.0, 1.0, 4.0]) ] dispatcher = MetricsExecutor([{'type': 'accuracy', 'top_k': 1}], None) metric_result = dispatcher.update_metrics_on_batch( range(len(annotations)), annotations, predictions) expected_metric_result = [ PerImageMetricResult('accuracy', 'accuracy', 1.0, 'higher-better'), PerImageMetricResult('accuracy', 'accuracy', 0.0, 'higher-better') ] assert len(metric_result) == 2 assert 0 in metric_result assert len(metric_result[0]) == 1 assert metric_result[0][0] == expected_metric_result[0] assert 1 in metric_result assert len(metric_result[1]) == 1 assert metric_result[1][0] == expected_metric_result[1]
def test_missed_metrics_raises_config_error_exception_with_custom_name(self): config = {'name': 'some_name', 'annotation': 'custom'} with pytest.raises(ConfigError): MetricsExecutor(config, None)
def test_metrics_with_empty_entry_raises_config_error_exception(self): config = {'annotation': 'custom', 'metrics': [{}]} with pytest.raises(ConfigError): MetricsExecutor(config, None)
def test_undefined_metric_type_raises_config_error_exception(self): with pytest.raises(ConfigError): MetricsExecutor([{'type': ''}], None)
def test_accuracy_arguments(self): dispatcher = MetricsExecutor([{'type': 'accuracy', 'top_k': 1}], None) assert len(dispatcher.metrics) == 1 _, _, accuracy_metric, _, _, _ = dispatcher.metrics[0] assert isinstance(accuracy_metric, ClassificationAccuracy) assert accuracy_metric.top_k == 1
def test_metrics_with_empty_entry_raises_config_error_exception(self): with pytest.raises(ConfigError): MetricsExecutor([{}], None)
def test_missed_metrics_raises_config_error_exception(self): with pytest.raises(ConfigError): MetricsExecutor([], None)
def test_undefined_metric_type_raises_config_error_exception(self): config = {'annotation': 'custom', 'metrics': [{'type': ''}]} with pytest.raises(ConfigError): MetricsExecutor(config, None)
def test_multi_class_update_metric_result(self): annotations = make_segmentation_representation(np.array([[1, 0, 3, 0, 0], [0, 0, 0, 0, 0]]), True) predictions = make_segmentation_representation(np.array([[1, 2, 3, 2, 3], [0, 0, 0, 0, 0]]), False) dispatcher = MetricsExecutor(create_config(self.name), multi_class_dataset()) metric_result, _ = dispatcher.update_metrics_on_batch(range(len(annotations)), annotations, predictions) assert metric_result[0][0].result == 0.7
def test_accuracy_with_several_prediction_source_raises_value_error_exception(self): config = {'annotation': 'custom', 'metrics': [{'type': 'accuracy', 'top_k': 1, 'prediction_source': 'prediction1, prediction2'}]} with pytest.raises(ConfigError): MetricsExecutor(config, None)