Пример #1
0
 def is_not_nan(output):
     nans = np.isnan(output)
     if np.any(nans):
         G_LOGGER.error("Encountered one or more NaNs")
         G_LOGGER.error(
             "Note: Use -vv or set logging verbosity to EXTRA_VERBOSE to display locations of NaNs",
             mode=LogMode.ONCE)
         G_LOGGER.extra_verbose("Note: NaNs at:\n{:}".format(nans))
         return False
     return True
Пример #2
0
 def execute_runner_with_queue(runner_queue, runner, loader_cache):
     iteration_results = None
     try:
         iteration_results = execute_runner(runner, loader_cache)
     except:
         # Cannot send the exception back, as it is not necessarily pickleable
         import traceback
         G_LOGGER.error(traceback.format_exc())
     misc.try_send_on_queue(runner_queue, iteration_results)
     # After finishing, send the updated loader_cache back.
     misc.try_send_on_queue(runner_queue, loader_cache)
Пример #3
0
 def is_finite(output):
     non_finite = np.logical_not(np.isfinite(output))
     if np.any(non_finite):
         G_LOGGER.error("Encountered one or more non-finite values")
         G_LOGGER.error(
             "Note: Use -vv or set logging verbosity to EXTRA_VERBOSE to display non-finite values",
             mode=LogMode.ONCE)
         G_LOGGER.extra_verbose(
             "Note: non-finite values at:\n{:}".format(non_finite))
         G_LOGGER.extra_verbose("Note: non-finite values:\n{:}".format(
             output[non_finite]))
         return False
     return True
Пример #4
0
    def validate(run_results,
                 check_finite=None,
                 check_nan=None,
                 fail_fast=None):
        """
        Checks output validity.

        Args:
            run_results (Dict[str, List[IterationResult]]): The result of Comparator.run().
            check_finite (bool): Whether to fail on non-finite values. Defaults to False.
            check_nan (bool): Whether to fail on NaNs. Defaults to True.
            fail_fast (bool): Whether to fail after the first invalid value. Defaults to False.

        Returns:
            bool: True if all outputs were valid, False otherwise.
        """
        check_finite = misc.default_value(check_finite, False)
        check_nan = misc.default_value(check_nan, True)
        fail_fast = misc.default_value(fail_fast, False)

        def is_finite(output):
            non_finite = np.logical_not(np.isfinite(output))
            if np.any(non_finite):
                G_LOGGER.error("Encountered one or more non-finite values")
                G_LOGGER.error(
                    "Note: Use -vv or set logging verbosity to EXTRA_VERBOSE to display non-finite values",
                    mode=LogMode.ONCE)
                G_LOGGER.extra_verbose(
                    "Note: non-finite values at:\n{:}".format(non_finite))
                G_LOGGER.extra_verbose("Note: non-finite values:\n{:}".format(
                    output[non_finite]))
                return False
            return True

        def is_not_nan(output):
            nans = np.isnan(output)
            if np.any(nans):
                G_LOGGER.error("Encountered one or more NaNs")
                G_LOGGER.error(
                    "Note: Use -vv or set logging verbosity to EXTRA_VERBOSE to display locations of NaNs",
                    mode=LogMode.ONCE)
                G_LOGGER.extra_verbose("Note: NaNs at:\n{:}".format(nans))
                return False
            return True

        all_valid = True
        for runner_name, results in run_results:
            for result in results:
                for output_name, output in result.items():
                    G_LOGGER.info(
                        "Runner: {:40} | Validating output: {:} (check_finite={:}, check_nan={:})"
                        .format(runner_name, output_name, check_finite,
                                check_nan))

                    output_valid = True
                    with G_LOGGER.indent():
                        if check_nan:
                            output_valid &= is_not_nan(output)
                        if check_finite:
                            output_valid &= is_finite(output)

                        all_valid &= output_valid

                        if output_valid:
                            G_LOGGER.finish(
                                "Runner: {:40} | Output: {:} is valid".format(
                                    runner_name, output_name))
                        else:
                            G_LOGGER.error(
                                "Runner: {:40} | Errors detected in output: {:}"
                                .format(runner_name, output_name))
                            if fail_fast:
                                return False

        if all_valid:
            G_LOGGER.finish("Validation passed")
        else:
            G_LOGGER.error("Validation failed")
        return all_valid
Пример #5
0
    def compare_accuracy(run_results,
                         fail_fast=False,
                         comparisons=None,
                         compare_func=None):
        """
        Args:
            run_results (RunResults): The result of Comparator.run()


            fail_fast (bool): Whether to exit after the first failure
            comparisons (List[Tuple[int, int]]):
                    Comparisons to perform, specified by runner indexes. For example, [(0, 1), (1, 2)]
                    would compare the first runner with the second, and the second with the third.
                    By default, this compares each result to the subsequent one.
            compare_func (Callable(IterationResult, IterationResult) -> OrderedDict[str, bool]):
                    A function that takes in two IterationResults, and returns a dictionary that maps output
                    names to a boolean (or anything convertible to a boolean) indicating whether outputs matched.
                    The order of arguments to this function is guaranteed to be the same as the ordering of the
                    tuples contained in `comparisons`.

        Returns:
            AccuracyResult:
                    A summary of the results of the comparisons. The order of the keys (i.e. runner pairs) is
                    guaranteed to be the same as the order of `comparisons`. For more details, see the AccuracyResult
                    docstring (e.g. help(AccuracyResult)).
        """
        def find_mismatched(match_dict):
            return [
                name for name, matched in match_dict.items()
                if not bool(matched)
            ]

        compare_func = misc.default_value(compare_func,
                                          CompareFunc.basic_compare_func())
        comparisons = misc.default_value(
            comparisons, Comparator.default_comparisons(run_results))

        accuracy_result = AccuracyResult()
        for runner0_index, runner1_index in comparisons:
            (runner0_name, results0), (
                runner1_name, results1
            ) = run_results[runner0_index], run_results[runner1_index]

            G_LOGGER.start("Accuracy Comparison | {:} vs. {:}".format(
                runner0_name, runner1_name))
            with G_LOGGER.indent():
                runner_pair = (runner0_name, runner1_name)
                accuracy_result[runner_pair] = []

                num_iters = min(len(results0), len(results1))
                for iteration, (result0,
                                result1) in enumerate(zip(results0, results1)):
                    if num_iters > 1:
                        G_LOGGER.info("Iteration: {:}".format(iteration))
                    with contextlib.ExitStack() as stack:
                        if num_iters > 1:
                            stack.enter_context(G_LOGGER.indent())
                        iteration_match_dict = compare_func(result0, result1)
                        accuracy_result[runner_pair].append(
                            iteration_match_dict)

                    mismatched_outputs = find_mismatched(iteration_match_dict)
                    if fail_fast and mismatched_outputs:
                        return accuracy_result

                G_LOGGER.extra_verbose(
                    "Finished comparing {:} with {:}".format(
                        runner0_name,
                        runner1_name,
                    ))

                passed, failed, total = accuracy_result.stats(runner_pair)
                pass_rate = accuracy_result.percentage(runner_pair) * 100.0
                if num_iters > 1 or len(comparisons) > 1:
                    msg = "Accuracy Summary | {:} vs. {:} | Passed: {:}/{:} iterations | Pass Rate: {:}%".format(
                        runner0_name, runner1_name, passed, total, pass_rate)
                    if passed == total:
                        G_LOGGER.finish(msg)
                    else:
                        G_LOGGER.error(msg)
        return accuracy_result
Пример #6
0
            def check_outputs_match(out0, out0_name, out1, out1_name, per_out_rtol, per_out_atol):
                def compute_max(buffer):
                    if misc.is_empty_shape(buffer.shape):
                        return 0
                    return np.amax(buffer)

                # Returns index of max value
                def compute_argmax(buffer):
                    if misc.is_empty_shape(buffer.shape):
                        return 0
                    return np.unravel_index(np.argmax(buffer), buffer.shape)

                def compute_min(buffer):
                    if misc.is_empty_shape(buffer.shape):
                        return 0
                    return np.amin(buffer)

                # Returns index of min value
                def compute_argmin(buffer):
                    if misc.is_empty_shape(buffer.shape):
                        return 0
                    return np.unravel_index(np.argmin(buffer), buffer.shape)

                def compute_mean(buffer):
                    if misc.is_empty_shape(buffer.shape):
                        return 0
                    return np.mean(buffer)


                def compute_required():
                    # The purpose of this function is to determine the minimum tolerances such that
                    # the outputs would be considered a match.
                    # The NumPy formula for np.isclose is absolute(out0 - out1) <= (per_out_atol + per_out_rtol * absolute(out1))
                    # So, for both absolute/relative tolerance, given either one,
                    # we can compute the required value for the other:
                    # per_out_atol = absolute(out0 - out1)
                    # atol_if_rtol = absolute(out0 - out1)  - per_out_rtol * absolute(out1)
                    # per_out_rtol = (absolute(out0 - out1) - per_out_atol) / absolute(out1)
                    if np.issubdtype(out0.dtype, np.bool_) and np.issubdtype(out1.dtype, np.bool_):
                        absdiff = np.logical_xor(out0, out1)
                    else:
                        absdiff = np.abs(out0 - out1)
                    absout1 = np.abs(out1)
                    max_absdiff = max(compute_max(absdiff), 0.0)
                    required_atol_if_rtol = max(compute_max(absdiff - per_out_rtol * absout1), 0.0)
                    # Suppress divide by 0 warnings
                    with np.testing.suppress_warnings() as sup:
                        sup.filter(RuntimeWarning)
                        reldiff = np.maximum(absdiff - per_out_atol, 0.0) / absout1
                        max_reldiff = max(compute_max(reldiff), 0.0)
                    return max_absdiff, required_atol_if_rtol, max_reldiff, compute_mean(absdiff), compute_mean(reldiff)


                def log_mismatches(mismatches):
                    try:
                        with G_LOGGER.indent():
                            G_LOGGER.super_verbose("Mismatched indices:\n{:}".format(np.argwhere(mismatches)))
                            G_LOGGER.extra_verbose("Runner: {:40} | Mismatched values:\n{:}".format(iter_result0.runner_name, out0[mismatches]))
                            G_LOGGER.extra_verbose("Runner: {:40} | Mismatched values:\n{:}".format(iter_result1.runner_name, out1[mismatches]))
                    except:
                        G_LOGGER.warning("Failing to log mismatches - this may be because the outputs are of different shapes")


                try:
                    mismatches = np.logical_not(np.isclose(output0, output1, rtol=per_out_rtol, atol=per_out_atol))
                except Exception as err:
                    G_LOGGER.warning("Failed to compare outputs with:\n{:}\nSkipping".format(err))
                    return False

                G_LOGGER.super_verbose("Runner: {:40} | Output: {:} (dtype={:}, shape={:}):\n{:}".format(
                                            iter_result0.runner_name, out0_name, out0.dtype, out0.shape, misc.indent_block(out0)))
                G_LOGGER.super_verbose("Runner: {:40} | Output: {:} (dtype={:}, shape={:}):\n{:}".format(
                                            iter_result1.runner_name, out1_name, out1.dtype, out1.shape, misc.indent_block(out1)))

                failed = np.any(mismatches)

                try:
                    max_absdiff, required_atol_if_rtol, max_reldiff, mean_absdiff, mean_reldiff = compute_required()
                except Exception as err:
                    max_absdiff, required_atol_if_rtol, max_reldiff, mean_absdiff, mean_reldiff = None, None, None, None, None
                    G_LOGGER.warning("Could not determine required tolerances due to an error:\n{:}".format(err))
                    log_msg = ""
                else:
                    log_msg = "Required tolerances: [atol={:.5g}] OR [rtol={:.5g}, atol={:.5g}] OR [rtol={:.5g}, atol={:.5g}] | Mean Error: Absolute={:.5g}, Relative={:.5g}\n".format(
                                    max_absdiff, per_out_rtol, required_atol_if_rtol, max_reldiff, per_out_atol, mean_absdiff, mean_reldiff)

                log_msg += "Runner: {:40} | Stats: mean={:.5g}, min={:.5g} at {:}, max={:.5g} at {:}\n".format(
                                iter_result0.runner_name, compute_mean(out0), compute_min(out0), compute_argmin(out0), compute_max(out0), compute_argmax(out0))
                log_msg += "Runner: {:40} | Stats: mean={:.5g}, min={:.5g} at {:}, max={:.5g} at {:}\n".format(
                                iter_result1.runner_name, compute_mean(out1), compute_min(out1), compute_argmin(out1), compute_max(out1), compute_argmax(out1))
                G_LOGGER.info(log_msg)

                if failed:
                    log_mismatches(mismatches)
                    G_LOGGER.error("FAILED | Difference exceeds tolerance (rtol={:}, atol={:})".format(per_out_rtol, per_out_atol))
                else:
                    G_LOGGER.finish("PASSED | Difference is within tolerance (rtol={:}, atol={:})".format(per_out_rtol, per_out_atol))

                G_LOGGER.extra_verbose("Finished comparing: '{:}' (dtype={:}, shape={:}) [{:}] and '{:}' (dtype={:}, shape={:}) [{:}]"
                                .format(out0_name, out0.dtype, out0.shape, iter_result0.runner_name, out1_name, out1.dtype, out1.shape, iter_result1.runner_name))
                return OutputCompareResult(not failed, max_absdiff, max_reldiff)
Пример #7
0
        def compare_output(iter_result0, iter_result1):
            """
            Compare the outputs of two runners from a single iteration.

            This function will always iterate over the output names of the first IterationResult,
                and attempt to find corresponding output names in the second.
            If no corresponding output name is found, the output is skipped.
            If all output names are skipped, then this function raises an error.

            Args:
                iter_result0 (IterationResult): The result of the first runner.
                iter_result1 (IterationResult): The result of the second runner.

            Returns:
                OrderedDict[str, OutputCompareResult]:
                        The name of the outputs compared, derived from the first IterationResult,
                        and whether they matched. If an output name is not found, it is omitted from this dictionary.

            Raises:
                PolygraphyException: If all output names are skipped, and thus no outputs are compared.
            """
            # Returns whether the outputs match
            def check_outputs_match(out0, out0_name, out1, out1_name, per_out_rtol, per_out_atol):
                def compute_max(buffer):
                    if misc.is_empty_shape(buffer.shape):
                        return 0
                    return np.amax(buffer)

                # Returns index of max value
                def compute_argmax(buffer):
                    if misc.is_empty_shape(buffer.shape):
                        return 0
                    return np.unravel_index(np.argmax(buffer), buffer.shape)

                def compute_min(buffer):
                    if misc.is_empty_shape(buffer.shape):
                        return 0
                    return np.amin(buffer)

                # Returns index of min value
                def compute_argmin(buffer):
                    if misc.is_empty_shape(buffer.shape):
                        return 0
                    return np.unravel_index(np.argmin(buffer), buffer.shape)

                def compute_mean(buffer):
                    if misc.is_empty_shape(buffer.shape):
                        return 0
                    return np.mean(buffer)


                def compute_required():
                    # The purpose of this function is to determine the minimum tolerances such that
                    # the outputs would be considered a match.
                    # The NumPy formula for np.isclose is absolute(out0 - out1) <= (per_out_atol + per_out_rtol * absolute(out1))
                    # So, for both absolute/relative tolerance, given either one,
                    # we can compute the required value for the other:
                    # per_out_atol = absolute(out0 - out1)
                    # atol_if_rtol = absolute(out0 - out1)  - per_out_rtol * absolute(out1)
                    # per_out_rtol = (absolute(out0 - out1) - per_out_atol) / absolute(out1)
                    if np.issubdtype(out0.dtype, np.bool_) and np.issubdtype(out1.dtype, np.bool_):
                        absdiff = np.logical_xor(out0, out1)
                    else:
                        absdiff = np.abs(out0 - out1)
                    absout1 = np.abs(out1)
                    max_absdiff = max(compute_max(absdiff), 0.0)
                    required_atol_if_rtol = max(compute_max(absdiff - per_out_rtol * absout1), 0.0)
                    # Suppress divide by 0 warnings
                    with np.testing.suppress_warnings() as sup:
                        sup.filter(RuntimeWarning)
                        reldiff = np.maximum(absdiff - per_out_atol, 0.0) / absout1
                        max_reldiff = max(compute_max(reldiff), 0.0)
                    return max_absdiff, required_atol_if_rtol, max_reldiff, compute_mean(absdiff), compute_mean(reldiff)


                def log_mismatches(mismatches):
                    try:
                        with G_LOGGER.indent():
                            G_LOGGER.super_verbose("Mismatched indices:\n{:}".format(np.argwhere(mismatches)))
                            G_LOGGER.extra_verbose("Runner: {:40} | Mismatched values:\n{:}".format(iter_result0.runner_name, out0[mismatches]))
                            G_LOGGER.extra_verbose("Runner: {:40} | Mismatched values:\n{:}".format(iter_result1.runner_name, out1[mismatches]))
                    except:
                        G_LOGGER.warning("Failing to log mismatches - this may be because the outputs are of different shapes")


                try:
                    mismatches = np.logical_not(np.isclose(output0, output1, rtol=per_out_rtol, atol=per_out_atol))
                except Exception as err:
                    G_LOGGER.warning("Failed to compare outputs with:\n{:}\nSkipping".format(err))
                    return False

                G_LOGGER.super_verbose("Runner: {:40} | Output: {:} (dtype={:}, shape={:}):\n{:}".format(
                                            iter_result0.runner_name, out0_name, out0.dtype, out0.shape, misc.indent_block(out0)))
                G_LOGGER.super_verbose("Runner: {:40} | Output: {:} (dtype={:}, shape={:}):\n{:}".format(
                                            iter_result1.runner_name, out1_name, out1.dtype, out1.shape, misc.indent_block(out1)))

                failed = np.any(mismatches)

                try:
                    max_absdiff, required_atol_if_rtol, max_reldiff, mean_absdiff, mean_reldiff = compute_required()
                except Exception as err:
                    max_absdiff, required_atol_if_rtol, max_reldiff, mean_absdiff, mean_reldiff = None, None, None, None, None
                    G_LOGGER.warning("Could not determine required tolerances due to an error:\n{:}".format(err))
                    log_msg = ""
                else:
                    log_msg = "Required tolerances: [atol={:.5g}] OR [rtol={:.5g}, atol={:.5g}] OR [rtol={:.5g}, atol={:.5g}] | Mean Error: Absolute={:.5g}, Relative={:.5g}\n".format(
                                    max_absdiff, per_out_rtol, required_atol_if_rtol, max_reldiff, per_out_atol, mean_absdiff, mean_reldiff)

                log_msg += "Runner: {:40} | Stats: mean={:.5g}, min={:.5g} at {:}, max={:.5g} at {:}\n".format(
                                iter_result0.runner_name, compute_mean(out0), compute_min(out0), compute_argmin(out0), compute_max(out0), compute_argmax(out0))
                log_msg += "Runner: {:40} | Stats: mean={:.5g}, min={:.5g} at {:}, max={:.5g} at {:}\n".format(
                                iter_result1.runner_name, compute_mean(out1), compute_min(out1), compute_argmin(out1), compute_max(out1), compute_argmax(out1))
                G_LOGGER.info(log_msg)

                if failed:
                    log_mismatches(mismatches)
                    G_LOGGER.error("FAILED | Difference exceeds tolerance (rtol={:}, atol={:})".format(per_out_rtol, per_out_atol))
                else:
                    G_LOGGER.finish("PASSED | Difference is within tolerance (rtol={:}, atol={:})".format(per_out_rtol, per_out_atol))

                G_LOGGER.extra_verbose("Finished comparing: '{:}' (dtype={:}, shape={:}) [{:}] and '{:}' (dtype={:}, shape={:}) [{:}]"
                                .format(out0_name, out0.dtype, out0.shape, iter_result0.runner_name, out1_name, out1.dtype, out1.shape, iter_result1.runner_name))
                return OutputCompareResult(not failed, max_absdiff, max_reldiff)


            output_status = OrderedDict() # OrderedDict[str, bool] Maps output names to whether they matched.

            if not check_shapes:
                G_LOGGER.info("Strict shape checking disabled. Will attempt to match output shapes before comparisons")


            def default_find_output_func(output_name, index, iter_result):
                found_name = misc.find_in_dict(output_name, iter_result, index)
                if found_name is None:
                    return None
                elif found_name != output_name:
                    exact_match = misc.find_in_dict(found_name, iter_result0)
                    if exact_match == found_name:
                        G_LOGGER.verbose("Will not compare {:} with {:}, since the former already has an exact match: {:}".format(
                                            found_name, output_name, exact_match))
                        return None # If the found output is being compared against another output already, skip this non-exact match
                    G_LOGGER.warning("Output names did not match exactly. Assuming {:} output: {:} "
                                    "corresponds to output: {:}".format(
                                        iter_result.runner_name, found_name, output_name))
                return [found_name]


            nonlocal find_output_func
            find_output_func = misc.default_value(find_output_func, default_find_output_func)

            for index, (out0_name, output0) in enumerate(iter_result0.items()):
                out1_names = misc.default_value(find_output_func(out0_name, index, iter_result1), [])

                if len(out1_names) > 1:
                    G_LOGGER.info("Will attempt to compare output: '{:}' [{:}] with multiple outputs: '{:}' [{:}]".format(
                                    out0_name, iter_result0.runner_name, list(out1_names), iter_result1.runner_name))

                for out1_name in out1_names:
                    if out1_name is None or out1_name not in iter_result1:
                        G_LOGGER.warning("For output: '{:}' [{:}], skipping corresponding output: '{:}' [{:}], "
                                         "since the output was not found".format(out0_name, iter_result0.runner_name,
                                                                                 out1_name, iter_result1.runner_name))
                        continue

                    output1 = iter_result1[out1_name]
                    G_LOGGER.start("Comparing Output: '{:}' (dtype={:}, shape={:}) with '{:}' (dtype={:}, shape={:})".format(
                                        out0_name, output0.dtype, output0.shape, out1_name, output1.dtype, output1.shape))
                    G_LOGGER.extra_verbose("Note: Comparing {:} vs. {:}".format(iter_result0.runner_name, iter_result1.runner_name))


                    def get_tol(tol_dict):
                        if isinstance(tol_dict, numbers.Number):
                            return tol_dict

                        if out0_name in tol_dict:
                            return tol_dict[out0_name]
                        elif "" in tol_dict:
                            return tol_dict[""]

                        G_LOGGER.critical("Could not find a tolerance for output: '{:}' in the provided tolerance map: {:}.\n"
                                          "Note: Use a key of `""` in the map to specify a default tolerance.".format(out0_name, tol_dict))


                    with G_LOGGER.indent():
                        if check_shapes and output0.shape != output1.shape:
                            G_LOGGER.error("Will not compare outputs of different shapes. Note: Output shapes are "
                                           "{:} and {:}.".format(output0.shape, output1.shape))
                            G_LOGGER.error("Note: Use --no-strict-shape-checking or set check_shapes=False to "
                                           "attempt to compare values anyway.", mode=LogMode.ONCE)
                            outputs_match = False
                        else:
                            output1 = misc.try_match_shape(output1, output0.shape)
                            output0 = output0.reshape(output1.shape)
                            outputs_match = check_outputs_match(output0, out0_name, output1, out1_name,
                                                                per_out_rtol=get_tol(rtol), per_out_atol=get_tol(atol))

                        output_status[out0_name] = outputs_match
                        if fail_fast and not outputs_match:
                            return output_status


            mismatched_output_names = [name for name, matched in output_status.items() if not matched]
            if mismatched_output_names:
                G_LOGGER.error("FAILED | Mismatched outputs: {:}".format(mismatched_output_names))

            # This is useful for catching cases were Polygraphy does something wrong with the runner output buffers
            if not output_status and (bool(iter_result0.keys()) or bool(iter_result1.keys())):
                r0_name = iter_result0.runner_name
                r0_outs = list(iter_result0.keys())
                r1_name = iter_result1.runner_name
                r1_outs = list(iter_result1.keys())
                G_LOGGER.critical("All outputs were skipped, no common outputs found! Note:\n{:} outputs: "
                                  "{:}\n{:} outputs: {:}".format(r0_name, r0_outs, r1_name, r1_outs))

            return output_status
Пример #8
0
def check_onnx_parser_errors(parser):
    if parser.num_errors > 0:
        for index in range(parser.num_errors):
            G_LOGGER.error(parser.get_error(index))
        G_LOGGER.critical("Could not parse ONNX correctly")