def handle_audit_verification(audit_test_name, config): # Decouples the verification step from any auditing runs for better maintenance and testing logging.info('AUDIT HARNESS: Running verification script...') # Prepare log_dir config['log_dir'] = os.path.join('build/compliance_logs', audit_test_name) # Get a harness object harness, config = _generate_harness_object(config=config, profile=None) result = None if audit_test_name == 'TEST01': result = auditing.verify_test01(harness) elif audit_test_name == 'TEST04-A' or audit_test_name == 'TEST04-B': exclude_list = [BENCHMARKS.BERT, BENCHMARKS.DLRM, BENCHMARKS.RNNT] if BENCHMARKS.alias(config['benchmark']) in exclude_list: logging.info( 'TEST04 is not supported for benchmark {}. Ignoring request...' .format(config['benchmark'])) return None result = auditing.verify_test04(harness) elif audit_test_name == 'TEST05': result = auditing.verify_test05(harness) return result
def handle_audit_verification(audit_test_name, config): # Decouples the verification step from any auditing runs for better maintenance and testing logging.info('AUDIT HARNESS: Running verification script...') # Prepare log_dir config['log_dir'] = os.path.join('build/compliance_logs', audit_test_name) # Get a harness object harness, config = _generate_harness_object(config=config, profile=None) result = None if audit_test_name == 'TEST01': result = auditing.verify_test01(harness) if result == 'TEST01 FALLBACK': # Signals a fallback for failed test # Process description: # 1. Generate baseline_accuracy file # 2. Calculate the accuracy of baseline, using the benchmark's accuracy script # 3. Use same script to calculate accuracy of compliance run # 4. Depending on accuracy level, declare success if two values are within defined tolerance. logging.info('main.py notified for fallback handling on TEST01') # Run compliance script to generate baseline file full_log_dir = harness.get_full_log_dir() results_path = os.path.join('results', harness.get_system_name(), harness._get_submission_benchmark_name(), harness.scenario) harness_accuracy_log = os.path.join(results_path, 'accuracy/mlperf_log_accuracy.json') compliance_accuracy_log = os.path.join(full_log_dir, 'mlperf_log_accuracy.json') fallback_command = 'bash build/inference/compliance/nvidia/TEST01/create_accuracy_baseline.sh {} {}'.format( harness_accuracy_log, compliance_accuracy_log ) # generates new file called mlperf_log_accuracy_baseline.json run_command(fallback_command, get_output=True) def move_file(src, dst): logging.info('Moving file: {} --> {}'.format(src, dst)) shutil.move(src, dst) def copy_file(src, dst): logging.info('Copying file: {} --> {}'.format(src, dst)) shutil.copy(src, dst) # Create accuracy and performance directories accuracy_dir = os.path.join(full_log_dir, 'TEST01', 'accuracy') performance_dir = os.path.join(full_log_dir, 'TEST01', 'performance', 'run_1') os.makedirs(accuracy_dir, exist_ok=True) os.makedirs(performance_dir, exist_ok=True) # Get the accuracy of baseline file fallback_result_baseline = check_accuracy('mlperf_log_accuracy_baseline.json', config, is_compliance=True) # Move it to the submission dir dest_path = os.path.join(accuracy_dir, 'baseline_accuracy.txt') move_file('accuracy.txt', dest_path) # Get the accuracy of compliance file fallback_result_compliance = check_accuracy('{}/mlperf_log_accuracy.json'.format(full_log_dir), config, is_compliance=True) # Move it to the submission dir - check_accuracy stores accuracy.txt in the directory # name provided in its first argument. So this file will already be located inside get_full_log_dir() src_path = os.path.join(full_log_dir, 'accuracy.txt') dest_path = os.path.join(accuracy_dir, 'compliance_accuracy.txt') move_file(src_path, dest_path) # Move the required logs to their correct locations since run_verification.py has failed. move_file('verify_accuracy.txt', os.path.join(full_log_dir, 'TEST01', 'verify_accuracy.txt')) copy_file(os.path.join(full_log_dir, 'mlperf_log_accuracy.json'), os.path.join(accuracy_dir, 'mlperf_log_accuracy.json')) copy_file(os.path.join(full_log_dir, 'mlperf_log_detail.txt'), os.path.join(performance_dir, 'mlperf_log_detail.txt')) copy_file(os.path.join(full_log_dir, 'mlperf_log_summary.txt'), os.path.join(performance_dir, 'mlperf_log_summary.txt')) # Need to run verify_performance.py script to get verify_performance.txt file. verify_performance_command = ("python3 build/inference/compliance/nvidia/TEST01/verify_performance.py -r " + results_path + "/performance/run_1/mlperf_log_summary.txt" + " -t " + performance_dir + "/mlperf_log_summary.txt | tee " + full_log_dir + "/TEST01/verify_performance.txt") run_command(verify_performance_command, get_output=True) # Check level of accuracy - this test's tolerance depends on it accuracy_level = config["accuracy_level"][:-1] if accuracy_level == '99.9': logging.info('High Accuracy benchmark detected. Tolerance set to 0.1%') if not math.isclose(fallback_result_baseline, fallback_result_compliance, rel_tol=0.001): raise ValueError('TEST01 + Fallback failure: BASELINE ACCURACY: {}, COMPLIANCE_ACCURACY: {}'.format(fallback_result_baseline, fallback_result_compliance)) else: logging.info('AUDIT HARNESS: Success: TEST01 failure redeemed via fallback approach.') print('TEST PASS') elif accuracy_level == '99': logging.info('Low Accuracy benchmark detected. Tolerance set to 1%') if not math.isclose(fallback_result_baseline, fallback_result_compliance, rel_tol=0.01): raise ValueError('TEST01 + Fallback failure: BASELINE ACCURACY: {}, COMPLIANCE_ACCURACY: {}'.format(fallback_result_baseline, fallback_result_compliance)) else: logging.info('AUDIT HARNESS: Success: TEST01 failure redeemed via fallback approach.') print('TEST PASS') else: raise ValueError('Accuracy level not supported: {}'.format(accuracy_level)) elif audit_test_name == 'TEST04-A' or audit_test_name == 'TEST04-B': exclude_list = [BENCHMARKS.BERT, BENCHMARKS.DLRM, BENCHMARKS.RNNT] if BENCHMARKS.alias(config['benchmark']) in exclude_list: logging.info('TEST04 is not supported for benchmark {}. Ignoring request...'.format(config['benchmark'])) return None result = auditing.verify_test04(harness) elif audit_test_name == 'TEST05': result = auditing.verify_test05(harness) return result