Пример #1
0
def run_3dunet_accuracy(engine_file, batch_size, num_images, verbose=False):
    runner = EngineRunner(engine_file, verbose=verbose)
    input_dtype, input_format = get_input_format(runner.engine)

    if verbose:
        logging.info("Running UNET accuracy test with:")
        logging.info("    engine_file: {:}".format(engine_file))
        logging.info("    batch_size: {:}".format(batch_size))
        logging.info("    num_images: {:}".format(num_images))
        logging.info("    input_dtype: {:}".format(input_dtype))
        logging.info("    input_format: {:}".format(input_format))

    if input_dtype == trt.DataType.FLOAT:
        format_string = "fp32"
    elif input_dtype == trt.DataType.INT8:
        if input_format == trt.TensorFormat.LINEAR:
            format_string = "int8_linear"
    elif input_dtype == trt.DataType.HALF:
        if input_format == trt.TensorFormat.LINEAR:
            format_string = "fp16_linear"
        elif input_format == trt.TensorFormat.DHWC8:
            format_string = "fp16_dhwc8"
    image_dir = os.path.join(os.getenv("PREPROCESSED_DATA_DIR", "build/preprocessed_data"),
            "brats", "brats_npy", format_string)

    if num_images is None:
        num_images = 67

    image_list = []
    with open("data_maps/brats/val_map.txt") as f:
        for line in f:
            image_list.append(line.split()[0])

    predictions = []
    batch_idx = 0
    for image_idx in range(0, num_images, batch_size):
        actual_batch_size = batch_size if image_idx + batch_size <= num_images else num_images - image_idx
        batch_images = np.ascontiguousarray(np.stack([np.load(os.path.join(image_dir, name + ".npy")) for name in image_list[image_idx:image_idx + actual_batch_size]]))

        start_time = time.time()
        outputs = runner([batch_images], actual_batch_size)

        print(np.mean(batch_images[0].astype(np.float32)))
        print(np.std(batch_images[0].astype(np.float32)))
        print(np.mean(outputs[0].astype(np.float32)))
        print(np.std(outputs[0].astype(np.float32)))

        if verbose:
            logging.info("Batch {:d} (Size {:}) >> Inference time: {:f}".format(batch_idx, actual_batch_size, time.time() - start_time))

        predictions.extend(outputs[0][:actual_batch_size])

        batch_idx += 1

    logging.warning("3D-Unet standalone accuracy checker does not have accuracy checking yet! Always return 1.0")

    return 1.0
Пример #2
0
    def postprocess_2(self):
        # Set input dtype and format
        input_tensor = self.network.get_input(0)
        if self.input_dtype == "int8":
            input_tensor.dtype = trt.int8
            input_tensor.dynamic_range = (-128, 127)
        if self.input_format == "linear":
            input_tensor.allowed_formats = 1 << int(trt.TensorFormat.LINEAR)
        elif self.input_format == "chw4":
            input_tensor.allowed_formats = 1 << int(trt.TensorFormat.CHW4)

        # Get the layers we care about.
        nb_layers = self.network.num_layers
        logging.debug(nb_layers)
        for i in range(nb_layers):
            layer = self.network.get_layer(i)
            logging.info("({:}) Layer '{:}' -> Type: {:} ON {:}".format(
                i, layer.name, layer.type,
                self.builder_config.get_device_type(layer)))

        while self.network.num_outputs > 0:
            logging.info("Unmarking output: {:}".format(
                self.network.get_output(0).name))
            self.network.unmark_output(self.network.get_output(0))
        #add top-k
        last_fc_layer = self.network.get_layer(nb_layers - 1)
        topk_layer = self.network.add_topk(last_fc_layer.get_output(0),
                                           trt.TopKOperation.MAX, 1, 2)
        topk_layer.name = "topk_layer"
        topk_layer.get_output(0).name = "topk_layer_output_value"
        topk_layer.get_output(1).name = "topk_layer_output_index"
        self.network.mark_output(topk_layer.get_output(1))

        if self.network.num_outputs != 1:
            logging.warning(
                "num outputs should be 1 after unmarking! Has {:}".format(
                    self.network.num_outputs))
            raise Exception
Пример #3
0
    def postprocess(self, useConvForFC=False):
        # Set input dtype and format
        input_tensor = self.network.get_input(0)
        if self.input_dtype == "int8":
            input_tensor.dtype = trt.int8
            input_tensor.dynamic_range = (-128, 127)
        if self.input_format == "linear":
            input_tensor.allowed_formats = 1 << int(trt.TensorFormat.LINEAR)
        elif self.input_format == "chw4":
            input_tensor.allowed_formats = 1 << int(trt.TensorFormat.CHW4)

        # Get the layers we care about.
        nb_layers = self.network.num_layers
        logging.debug(nb_layers)
        for i in range(nb_layers):
            layer = self.network.get_layer(i)
            logging.info("({:}) Layer '{:}' -> Type: {:} ON {:}".format(
                i, layer.name, layer.type,
                self.builder_config.get_device_type(layer)))

            # Detect the FC layer.
            # if "Fully Connected" in layer.name:
            if "MatMul" in layer.name:
                fc_layer = layer
                assert fc_layer.type == trt.LayerType.FULLY_CONNECTED
                fc_layer.__class__ = trt.IFullyConnectedLayer
                fc_kernel = fc_layer.kernel.reshape(1001, 2048)[1:, :]
                fc_bias = fc_layer.bias[1:]

                # (i-13)th layer should be reduction.
                reduce_layer = self.network.get_layer(i - 13)
                assert reduce_layer.type == trt.LayerType.REDUCE
                reduce_layer.__class__ = trt.IReduceLayer

                # (i-14)th layer should be the last ReLU
                last_conv_layer = self.network.get_layer(i - 14)
                assert last_conv_layer.type == trt.LayerType.ACTIVATION
                last_conv_layer.__class__ = trt.IActivationLayer

        # Unmark the old output since we are going to add new layers for the final part of the network.
        while self.network.num_outputs > 0:
            logging.info("Unmarking output: {:}".format(
                self.network.get_output(0).name))
            self.network.unmark_output(self.network.get_output(0))

        # Replace the reduce layer with pooling layer
        pool_layer_new = self.network.add_pooling(
            last_conv_layer.get_output(0), trt.PoolingType.AVERAGE, (7, 7))
        pool_layer_new.name = "squeeze_replaced"
        pool_layer_new.get_output(0).name = "squeeze_replaced_output"

        # Add fc layer
        fc_kernel = fc_kernel.flatten()
        if useConvForFC:
            fc_layer_new = self.network.add_convolution(
                pool_layer_new.get_output(0), fc_bias.size, (1, 1), fc_kernel,
                fc_bias)
        else:
            fc_layer_new = self.network.add_fully_connected(
                pool_layer_new.get_output(0), fc_bias.size, fc_kernel, fc_bias)
        fc_layer_new.name = "fc_replaced"
        fc_layer_new.get_output(0).name = "fc_replaced_output"

        # Add topK layer.
        topk_layer = self.network.add_topk(fc_layer_new.get_output(0),
                                           trt.TopKOperation.MAX, 1, 2)
        topk_layer.name = "topk_layer"
        topk_layer.get_output(0).name = "topk_layer_output_value"
        topk_layer.get_output(1).name = "topk_layer_output_index"

        # Mark the new output.
        self.network.mark_output(topk_layer.get_output(1))

        if self.network.num_outputs != 1:
            logging.warning(
                "num outputs should be 1 after unmarking! Has {:}".format(
                    self.network.num_outputs))
            raise Exception
Пример #4
0
    def initialize(self):
        """
        Parse input ONNX file to a TRT network. Apply layer optimizations and fusion plugins on network.
        """

        # Query system id for architecture
        self.system = get_system()
        self.gpu_arch = self.system.arch

        # Create network.
        self.network = self.builder.create_network(
            1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH))

        # Parse from onnx file.
        parser = trt.OnnxParser(self.network, self.logger)

        with open(self.model_path, "rb") as f:
            model = f.read()
        success = parser.parse(model)
        if not success:
            raise RuntimeError(
                "ofa_autusinian onnx model processing failed! Error: {:}".
                format(parser.get_error(0).desc()))
        # Set input dtype and format
        input_tensor = self.network.get_input(0)
        if self.input_dtype == "int8":
            input_tensor.dtype = trt.int8
            scale = struct.unpack('!f', bytes.fromhex('3caa5293'))[0]
            input_tensor.dynamic_range = (-scale * 127.0, scale * 127.0)
        if self.input_format == "linear":
            input_tensor.allowed_formats = 1 << int(trt.TensorFormat.LINEAR)
        elif self.input_format == "chw4":
            input_tensor.allowed_formats = 1 << int(trt.TensorFormat.CHW4)

        # Get the layers we care about.
        nb_layers = self.network.num_layers

        while self.network.num_outputs > 0:
            logging.info("Unmarking output: {:}".format(
                self.network.get_output(0).name))
            self.network.unmark_output(self.network.get_output(0))
        #add top-k
        last_fc_layer = self.network.get_layer(nb_layers - 1)
        topk_layer = self.network.add_topk(last_fc_layer.get_output(0),
                                           trt.TopKOperation.MAX, 1, 2)
        topk_layer.name = "topk_layer"
        topk_layer.get_output(0).name = "topk_layer_output_value"
        topk_layer.get_output(1).name = "topk_layer_output_index"
        self.network.mark_output(topk_layer.get_output(1))

        if self.network.num_outputs != 1:
            logging.warning(
                "num outputs should be 1 after unmarking! Has {:}".format(
                    self.network.num_outputs))
            raise Exception

        if self.precision == "int8" and self.batch_size > 1 and (
                not self.need_calibration):
            self.autosinian_optimize()

        self.initialized = True
Пример #5
0
def check_accuracy(log_file, config, is_compliance=False):
    benchmark_name = config["benchmark"]

    accuracy_targets = {
        BENCHMARKS.ResNet50: 76.46,
        BENCHMARKS.SSDResNet34: 20.0,
        BENCHMARKS.SSDMobileNet: 22.0,
        BENCHMARKS.BERT: 90.874,
        BENCHMARKS.DLRM: 80.25,
        BENCHMARKS.RNNT: 100.0 - 7.45225,
        BENCHMARKS.UNET: 0.853
    }
    threshold_ratio = float(config["accuracy_level"][:-1]) / 100

    if not os.path.exists(log_file):
        return "Cannot find accuracy JSON file."
    # checking if log_file is empty by just reading first several bytes
    # indeed, first 4B~6B is likely all we need to check: '', '[]', '[]\r', '[\n]\n', '[\r\n]\r\n', ...
    # but checking 8B for safety
    with open(log_file, 'r') as lf:
        first_8B = lf.read(8)
        if not first_8B or ('[' in first_8B and ']' in first_8B):
            return "No accuracy results in PerformanceOnly mode."

    dtype_expand_map = {"fp16": "float16", "fp32": "float32", "int8": "float16"} # Use FP16 output for INT8 mode
    accuracy_regex_map = import_module("build.inference.tools.submission.submission-checker").ACC_PATTERN

    threshold = accuracy_targets[benchmark_name] * threshold_ratio
    if benchmark_name in [BENCHMARKS.ResNet50]:
        cmd = "python3 build/inference/vision/classification_and_detection/tools/accuracy-imagenet.py --mlperf-accuracy-file {:} \
            --imagenet-val-file data_maps/imagenet/val_map.txt --dtype int32 ".format(log_file)
        regex = accuracy_regex_map["acc"]
    elif benchmark_name == BENCHMARKS.SSDResNet34:
        cmd = "python3 build/inference/vision/classification_and_detection/tools/accuracy-coco.py --mlperf-accuracy-file {:} \
            --coco-dir {:} --output-file build/ssd-resnet34-results.json --use-inv-map".format(
            log_file, os.path.join(os.environ.get("PREPROCESSED_DATA_DIR", "build/preprocessed_data"), "coco"))
        regex = accuracy_regex_map["mAP"]
    elif benchmark_name == BENCHMARKS.SSDMobileNet:
        cmd = "python3 build/inference/vision/classification_and_detection/tools/accuracy-coco.py --mlperf-accuracy-file {:} \
            --coco-dir {:} --output-file build/ssd-mobilenet-results.json".format(
            log_file, os.path.join(os.environ.get("PREPROCESSED_DATA_DIR", "build/preprocessed_data"), "coco"))
        regex = accuracy_regex_map["mAP"]
    elif benchmark_name == BENCHMARKS.BERT:
        # Having issue installing tokenizers on Xavier...
        if is_xavier():
            cmd = "python3 code/bert/tensorrt/accuracy-bert.py --mlperf-accuracy-file {:} --squad-val-file {:}".format(
                log_file, os.path.join(os.environ.get("DATA_DIR", "build/data"), "squad", "dev-v1.1.json"))
        else:
            dtype = config["precision"].lower()
            if dtype in dtype_expand_map:
                dtype = dtype_expand_map[dtype]
            val_data_path = os.path.join(
                os.environ.get("DATA_DIR", "build/data"),
                "squad", "dev-v1.1.json")
            vocab_file_path = "build/models/bert/vocab.txt"
            output_prediction_path = os.path.join(os.path.dirname(log_file), "predictions.json")
            cmd = "python3 build/inference/language/bert/accuracy-squad.py " \
                "--log_file {:} --vocab_file {:} --val_data {:} --out_file {:} " \
                "--output_dtype {:}".format(log_file, vocab_file_path, val_data_path, output_prediction_path, dtype)
        regex = accuracy_regex_map["F1"]
    elif benchmark_name == BENCHMARKS.DLRM:
        cmd = "python3 build/inference/recommendation/dlrm/pytorch/tools/accuracy-dlrm.py --mlperf-accuracy-file {:} " \
              "--day-23-file build/data/criteo/day_23 --aggregation-trace-file " \
              "build/preprocessed_data/criteo/full_recalib/sample_partition_trace.txt".format(log_file)
        regex = accuracy_regex_map["AUC"]
    elif benchmark_name == BENCHMARKS.RNNT:
        # Having issue installing librosa on Xavier...
        if is_xavier():
            cmd = "python3 code/rnnt/tensorrt/accuracy.py --loadgen_log {:}".format(log_file)
        else:
            # RNNT output indices are in INT8
            cmd = "python3 build/inference/speech_recognition/rnnt/accuracy_eval.py " \
                "--log_dir {:} --dataset_dir build/preprocessed_data/LibriSpeech/dev-clean-wav " \
                "--manifest build/preprocessed_data/LibriSpeech/dev-clean-wav.json " \
                "--output_dtype int8".format(os.path.dirname(log_file))
        regex = accuracy_regex_map["WER"]
    elif benchmark_name == BENCHMARKS.UNET:
        postprocess_dir = "build/brats_postprocessed_data"
        if not os.path.exists(postprocess_dir):
            os.makedirs(postprocess_dir)
        dtype = config["precision"].lower()
        if dtype in dtype_expand_map:
            dtype = dtype_expand_map[dtype]
        cmd = "python3 build/inference/vision/medical_imaging/3d-unet/accuracy-brats.py --log_file {:} " \
            "--output_dtype {:} --preprocessed_data_dir build/preprocessed_data/brats/brats_reference_preprocessed " \
            "--postprocessed_data_dir {:} " \
            "--label_data_dir build/preprocessed_data/brats/brats_reference_raw/Task043_BraTS2019/labelsTr".format(log_file, dtype, postprocess_dir)
        regex = accuracy_regex_map["DICE"]
        # Having issue installing nnUnet on Xavier...
        if is_xavier():
            logging.warning(
                "Accuracy checking for 3DUnet is not supported on Xavier. Please run the following command on desktop:\n{:}".format(cmd))
            cmd = 'echo "Accuracy: mean = 1.0000, whole tumor = 1.0000, tumor core = 1.0000, enhancing tumor = 1.0000"'
    else:
        raise ValueError("Unknown benchmark: {:}".format(benchmark_name))

    output = run_command(cmd, get_output=True)
    result_regex = re.compile(regex)
    accuracy = None
    with open(os.path.join(os.path.dirname(log_file), "accuracy.txt"), "w") as f:
        for line in output:
            print(line, file=f)
    for line in output:
        result_match = result_regex.match(line)
        if not result_match is None:
            accuracy = float(result_match.group(1))
            break

    accuracy_result = "PASSED" if accuracy is not None and accuracy >= threshold else "FAILED"

    if accuracy_result == "FAILED" and not is_compliance:
        raise RuntimeError(
            "Accuracy = {:.3f}, Threshold = {:.3f}. Accuracy test {:}!".format(
                accuracy, threshold, accuracy_result))

    if is_compliance:
        return accuracy  # Needed for numerical comparison

    return "Accuracy = {:.3f}, Threshold = {:.3f}. Accuracy test {:}.".format(
        accuracy, threshold, accuracy_result)
Пример #6
0
def check_accuracy(log_file, config, is_compliance=False):
    """Check accuracy of given benchmark."""

    benchmark_name = config["benchmark"]

    accuracy_targets = {
        BENCHMARKS.BERT: 90.874,
        BENCHMARKS.DLRM: 80.25,
        BENCHMARKS.RNNT: 100.0 - 7.45225,
        BENCHMARKS.ResNet50: 76.46,
        BENCHMARKS.SSDMobileNet: 22.0,
        BENCHMARKS.SSDResNet34: 20.0,
        BENCHMARKS.UNET: 0.853,
    }
    threshold_ratio = float(config["accuracy_level"][:-1]) / 100

    if not os.path.exists(log_file):
        return "Cannot find accuracy JSON file."

    # checking if log_file is empty by just reading first several bytes
    # indeed, first 4B~6B is likely all we need to check: '', '[]', '[]\r', '[\n]\n', '[\r\n]\r\n', ...
    # but checking 8B for safety
    with open(log_file, 'r') as lf:
        first_8B = lf.read(8)
        if not first_8B or ('[' in first_8B and ']' in first_8B):
            return "No accuracy results in PerformanceOnly mode."

    dtype_expand_map = {
        "fp16": "float16",
        "fp32": "float32",
        "int8": "float16"
    }  # Use FP16 output for INT8 mode

    # Since submission-checker uses a relative import, but we are running from main.py, we need to surface its directory
    # into sys.path so it can successfully import it.
    # Insert into index 1 so that current working directory still takes precedence.
    sys.path.insert(
        1,
        os.path.join(os.getcwd(), "build", "inference", "tools", "submission"))
    accuracy_regex_map = import_module("submission-checker").ACC_PATTERN

    threshold = accuracy_targets[benchmark_name] * threshold_ratio

    # Every benchmark has its own accuracy script. Prepare commandline with args to the script.
    skip_run_command = False
    if benchmark_name in [BENCHMARKS.ResNet50]:
        cmd = "python3 build/inference/vision/classification_and_detection/tools/accuracy-imagenet.py --mlperf-accuracy-file {:} \
            --imagenet-val-file data_maps/imagenet/val_map.txt --dtype int32 ".format(
            log_file)
        regex = accuracy_regex_map["acc"]
    elif benchmark_name == BENCHMARKS.SSDResNet34:
        cmd = "python3 build/inference/vision/classification_and_detection/tools/accuracy-coco.py --mlperf-accuracy-file {:} \
            --coco-dir {:} --output-file build/ssd-resnet34-results.json --use-inv-map".format(
            log_file,
            os.path.join(
                os.environ.get("PREPROCESSED_DATA_DIR",
                               "build/preprocessed_data"), "coco"))
        regex = accuracy_regex_map["mAP"]
    elif benchmark_name == BENCHMARKS.SSDMobileNet:
        cmd = "python3 build/inference/vision/classification_and_detection/tools/accuracy-coco.py --mlperf-accuracy-file {:} \
            --coco-dir {:} --output-file build/ssd-mobilenet-results.json".format(
            log_file,
            os.path.join(
                os.environ.get("PREPROCESSED_DATA_DIR",
                               "build/preprocessed_data"), "coco"))
        regex = accuracy_regex_map["mAP"]
    elif benchmark_name == BENCHMARKS.BERT:
        # Having issue installing tokenizers on Xavier...
        if is_xavier():
            cmd = "python3 code/bert/tensorrt/accuracy-bert.py --mlperf-accuracy-file {:} --squad-val-file {:}".format(
                log_file,
                os.path.join(os.environ.get("DATA_DIR", "build/data"), "squad",
                             "dev-v1.1.json"))
        else:
            dtype = config["precision"].lower()
            if dtype in dtype_expand_map:
                dtype = dtype_expand_map[dtype]
            val_data_path = os.path.join(
                os.environ.get("DATA_DIR", "build/data"), "squad",
                "dev-v1.1.json")
            vocab_file_path = "build/models/bert/vocab.txt"
            if 'CPU' in config['config_name']:
                vocab_file_path = "build/data/squad/vocab.txt"
            output_prediction_path = os.path.join(os.path.dirname(log_file),
                                                  "predictions.json")
            cmd = "python3 build/inference/language/bert/accuracy-squad.py " \
                "--log_file {:} --vocab_file {:} --val_data {:} --out_file {:} " \
                "--output_dtype {:}".format(log_file, vocab_file_path, val_data_path, output_prediction_path, dtype)
        regex = accuracy_regex_map["F1"]
    elif benchmark_name == BENCHMARKS.DLRM:
        cmd = "python3 build/inference/recommendation/dlrm/pytorch/tools/accuracy-dlrm.py --mlperf-accuracy-file {:} " \
              "--day-23-file build/data/criteo/day_23 --aggregation-trace-file " \
              "build/preprocessed_data/criteo/full_recalib/sample_partition_trace.txt".format(log_file)
        regex = accuracy_regex_map["AUC"]
    elif benchmark_name == BENCHMARKS.RNNT:
        # Having issue installing librosa on Xavier...
        if is_xavier():
            cmd = "python3 code/rnnt/tensorrt/accuracy.py --loadgen_log {:}".format(
                log_file)
        else:
            # RNNT output indices are in INT8
            cmd = "python3 build/inference/speech_recognition/rnnt/accuracy_eval.py " \
                "--log_dir {:} --dataset_dir build/preprocessed_data/LibriSpeech/dev-clean-wav " \
                "--manifest build/preprocessed_data/LibriSpeech/dev-clean-wav.json " \
                "--output_dtype int8".format(os.path.dirname(log_file))
        regex = accuracy_regex_map["WER"]
    elif benchmark_name == BENCHMARKS.UNET:
        postprocess_dir = "build/brats_postprocessed_data"
        if not os.path.exists(postprocess_dir):
            os.makedirs(postprocess_dir)
        dtype = config["precision"].lower()
        if dtype in dtype_expand_map:
            dtype = dtype_expand_map[dtype]
        cmd = "python3 build/inference/vision/medical_imaging/3d-unet/accuracy-brats.py --log_file {:} " \
            "--output_dtype {:} --preprocessed_data_dir build/preprocessed_data/brats/brats_reference_preprocessed " \
            "--postprocessed_data_dir {:} " \
            "--label_data_dir build/preprocessed_data/brats/brats_reference_raw/Task043_BraTS2019/labelsTr".format(log_file, dtype, postprocess_dir)
        regex = accuracy_regex_map["DICE"]
        # Having issue installing nnUnet on Xavier...
        if is_xavier():
            # Internally, run on another node to process the accuracy.
            try:
                cmd = cmd.replace(os.getcwd(), ".", 1)
                temp_cmd = "ssh -oBatchMode=yes computelab-frontend-02 \"timeout 1200 srun --gres=gpu:ga100:1 -t 20:00 " \
                    "bash -c 'cd {:} && make prebuild DOCKER_COMMAND=\\\"{:}\\\"'\"".format(os.getcwd(), cmd)
                full_output = run_command(temp_cmd, get_output=True)
                start_line_idx = -1
                end_line_idx = -1
                for (line_idx, line) in enumerate(full_output):
                    if "Please cite the following paper when using nnUNet:" in line:
                        start_line_idx = line_idx
                    if "Done!" in line:
                        end_line_idx = line_idx
                assert start_line_idx != -1 and end_line_idx != -1, "Failed in accuracy checking"
                output = full_output[start_line_idx:end_line_idx + 1]
                skip_run_command = True
            except Exception as e:
                logging.warning(
                    "Accuracy checking for 3DUnet is not supported on Xavier. Please run the following command on desktop:\n{:}"
                    .format(cmd))
                output = [
                    "Accuracy: mean = 1.0000, whole tumor = 1.0000, tumor core = 1.0000, enhancing tumor = 1.0000"
                ]
                skip_run_command = True
    else:
        raise ValueError("Unknown benchmark: {:}".format(benchmark_name))

    # Run benchmark's accuracy script and parse output for result.
    if not skip_run_command:
        output = run_command(cmd, get_output=True)
    result_regex = re.compile(regex)
    accuracy = None
    with open(os.path.join(os.path.dirname(log_file), "accuracy.txt"),
              "w") as f:
        for line in output:
            print(line, file=f)
    for line in output:
        result_match = result_regex.match(line)
        if not result_match is None:
            accuracy = float(result_match.group(1))
            break

    accuracy_result = "PASSED" if accuracy is not None and accuracy >= threshold else "FAILED"

    if accuracy_result == "FAILED" and not is_compliance:
        raise RuntimeError(
            "Accuracy = {:.3f}, Threshold = {:.3f}. Accuracy test {:}!".format(
                accuracy, threshold, accuracy_result))

    if is_compliance:
        return accuracy  # Needed for numerical comparison

    return "Accuracy = {:.3f}, Threshold = {:.3f}. Accuracy test {:}.".format(
        accuracy, threshold, accuracy_result)