Exemplo n.º 1
0
def handle_audit_verification(audit_test_name, config):
    # Decouples the verification step from any auditing runs for better maintenance and testing
    logging.info('AUDIT HARNESS: Running verification script...')
    # Prepare log_dir
    config['log_dir'] = os.path.join('build/compliance_logs', audit_test_name)
    # Get a harness object
    harness, config = _generate_harness_object(config=config, profile=None)

    result = None
    if audit_test_name == 'TEST01':
        result = auditing.verify_test01(harness)
    elif audit_test_name == 'TEST04-A' or audit_test_name == 'TEST04-B':
        exclude_list = [BENCHMARKS.BERT, BENCHMARKS.DLRM, BENCHMARKS.RNNT]
        if BENCHMARKS.alias(config['benchmark']) in exclude_list:
            logging.info(
                'TEST04 is not supported for benchmark {}. Ignoring request...'
                .format(config['benchmark']))
            return None
        result = auditing.verify_test04(harness)
    elif audit_test_name == 'TEST05':
        result = auditing.verify_test05(harness)
    return result
Exemplo n.º 2
0
def handle_audit_verification(audit_test_name, config):
    # Decouples the verification step from any auditing runs for better maintenance and testing
    logging.info('AUDIT HARNESS: Running verification script...')
    # Prepare log_dir
    config['log_dir'] = os.path.join('build/compliance_logs', audit_test_name)
    # Get a harness object
    harness, config = _generate_harness_object(config=config, profile=None)

    result = None
    if audit_test_name == 'TEST01':
        result = auditing.verify_test01(harness)
        if result == 'TEST01 FALLBACK':
            # Signals a fallback for failed test
            # Process description:
            #   1. Generate baseline_accuracy file
            #   2. Calculate the accuracy of baseline, using the benchmark's accuracy script
            #   3. Use same script to calculate accuracy of compliance run
            #   4. Depending on accuracy level, declare success if two values are within defined tolerance.
            logging.info('main.py notified for fallback handling on TEST01')

            # Run compliance script to generate baseline file
            full_log_dir = harness.get_full_log_dir()
            results_path = os.path.join('results', harness.get_system_name(), harness._get_submission_benchmark_name(), harness.scenario)
            harness_accuracy_log = os.path.join(results_path, 'accuracy/mlperf_log_accuracy.json')
            compliance_accuracy_log = os.path.join(full_log_dir, 'mlperf_log_accuracy.json')
            fallback_command = 'bash build/inference/compliance/nvidia/TEST01/create_accuracy_baseline.sh {} {}'.format(
                harness_accuracy_log,
                compliance_accuracy_log
            )
            # generates new file called mlperf_log_accuracy_baseline.json
            run_command(fallback_command, get_output=True)

            def move_file(src, dst):
                logging.info('Moving file: {} --> {}'.format(src, dst))
                shutil.move(src, dst)

            def copy_file(src, dst):
                logging.info('Copying file: {} --> {}'.format(src, dst))
                shutil.copy(src, dst)

            # Create accuracy and performance directories
            accuracy_dir = os.path.join(full_log_dir, 'TEST01', 'accuracy')
            performance_dir = os.path.join(full_log_dir, 'TEST01', 'performance', 'run_1')
            os.makedirs(accuracy_dir, exist_ok=True)
            os.makedirs(performance_dir, exist_ok=True)

            # Get the accuracy of baseline file
            fallback_result_baseline = check_accuracy('mlperf_log_accuracy_baseline.json', config, is_compliance=True)
            # Move it to the submission dir
            dest_path = os.path.join(accuracy_dir, 'baseline_accuracy.txt')
            move_file('accuracy.txt', dest_path)

            # Get the accuracy of compliance file
            fallback_result_compliance = check_accuracy('{}/mlperf_log_accuracy.json'.format(full_log_dir), config, is_compliance=True)
            # Move it to the submission dir - check_accuracy stores accuracy.txt in the directory 
            # name provided in its first argument. So this file will already be located inside get_full_log_dir()
            src_path = os.path.join(full_log_dir, 'accuracy.txt')
            dest_path = os.path.join(accuracy_dir, 'compliance_accuracy.txt')
            move_file(src_path, dest_path)

            # Move the required logs to their correct locations since run_verification.py has failed.
            move_file('verify_accuracy.txt', os.path.join(full_log_dir, 'TEST01', 'verify_accuracy.txt'))
            copy_file(os.path.join(full_log_dir, 'mlperf_log_accuracy.json'), os.path.join(accuracy_dir, 'mlperf_log_accuracy.json'))
            copy_file(os.path.join(full_log_dir, 'mlperf_log_detail.txt'), os.path.join(performance_dir, 'mlperf_log_detail.txt'))
            copy_file(os.path.join(full_log_dir, 'mlperf_log_summary.txt'), os.path.join(performance_dir, 'mlperf_log_summary.txt'))

            # Need to run verify_performance.py script to get verify_performance.txt file.
            verify_performance_command = ("python3 build/inference/compliance/nvidia/TEST01/verify_performance.py -r "
                + results_path + "/performance/run_1/mlperf_log_summary.txt" + " -t "
                + performance_dir + "/mlperf_log_summary.txt | tee " + full_log_dir + "/TEST01/verify_performance.txt")
            run_command(verify_performance_command, get_output=True)

            # Check level of accuracy - this test's tolerance depends on it
            accuracy_level = config["accuracy_level"][:-1]
            if accuracy_level == '99.9':
                logging.info('High Accuracy benchmark detected. Tolerance set to 0.1%')
                if not math.isclose(fallback_result_baseline, fallback_result_compliance, rel_tol=0.001):
                    raise ValueError('TEST01 + Fallback failure: BASELINE ACCURACY: {}, COMPLIANCE_ACCURACY: {}'.format(fallback_result_baseline, fallback_result_compliance))
                else:
                    logging.info('AUDIT HARNESS: Success: TEST01 failure redeemed via fallback approach.')
                    print('TEST PASS')
            elif accuracy_level == '99':
                logging.info('Low Accuracy benchmark detected. Tolerance set to 1%')
                if not math.isclose(fallback_result_baseline, fallback_result_compliance, rel_tol=0.01):
                    raise ValueError('TEST01 + Fallback failure: BASELINE ACCURACY: {}, COMPLIANCE_ACCURACY: {}'.format(fallback_result_baseline, fallback_result_compliance))
                else:
                    logging.info('AUDIT HARNESS: Success: TEST01 failure redeemed via fallback approach.')
                    print('TEST PASS')
            else:
                raise ValueError('Accuracy level not supported: {}'.format(accuracy_level))
    elif audit_test_name == 'TEST04-A' or audit_test_name == 'TEST04-B':
        exclude_list = [BENCHMARKS.BERT, BENCHMARKS.DLRM, BENCHMARKS.RNNT]
        if BENCHMARKS.alias(config['benchmark']) in exclude_list:
            logging.info('TEST04 is not supported for benchmark {}. Ignoring request...'.format(config['benchmark']))
            return None
        result = auditing.verify_test04(harness)
    elif audit_test_name == 'TEST05':
        result = auditing.verify_test05(harness)
    return result