def test_protobuf_read(self): output_dir = '/tmp/' os.environ['TEST_REPORT_FILE_PREFIX'] = output_dir benchmark_result_file_path = os.path.join( output_dir, 'TestUtils.testReportBenchmark') if os.path.exists(benchmark_result_file_path): os.remove(benchmark_result_file_path) self.report_benchmark(iters=2000, wall_time=1000, name='testReportBenchmark', metrics=[{ 'name': 'metric_name_1', 'value': 0, 'min_value': 1 }, { 'name': 'metric_name_2', 'value': 90, 'min_value': 0, 'max_value': 95 }]) actual_result = utils.read_benchmark_result(benchmark_result_file_path) os.remove(benchmark_result_file_path) expected_result = { 'name': 'TestUtils.testReportBenchmark', # google.protobuf.json_format.MessageToDict() will convert # int64 field to string. 'iters': '2000', 'wall_time': 1000, 'cpu_time': 0, 'throughput': 0, 'extras': {}, 'metrics': [{ 'name': 'metric_name_1', 'value': 0, 'min_value': 1 }, { 'name': 'metric_name_2', 'value': 90, 'min_value': 0, 'max_value': 95 }] } self.assertDictEqual(expected_result, actual_result)
def run_benchmark(self): """Run benchmark.""" for benchmark_method in self._get_benchmark_methods(): try: execution_id = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f') output_dir = os.path.join(self.output_root_dir, execution_id) utils.make_dir_if_not_exist(output_dir) # Setup per-method file logger filehandler = logging.FileHandler( filename=os.path.join(output_dir, 'perfzero.log'), mode='w') filehandler.setFormatter( logging.Formatter('%(asctime)s %(levelname)s: %(message)s')) logging.getLogger().addHandler(filehandler) class_instance = self._instantiate_benchmark_class(output_dir) benchmark_name = '{}.{}'.format(class_instance.__class__.__name__, benchmark_method) # tf.test.Benchmark.report_benchmark() will write benchmark results to # the file whose path is benchmark_result_file_path_prefix + # benchmark_name benchmark_result_file_path_prefix = os.path.join(output_dir, 'proto_') os.environ[ 'TEST_REPORT_FILE_PREFIX'] = benchmark_result_file_path_prefix benchmark_result_file_path = benchmark_result_file_path_prefix + benchmark_name # Run benchmark method logging.info('Start benchmark: %s', benchmark_name) getattr(class_instance, benchmark_method)() logging.info('End benchmark: %s', benchmark_name) # Read and upload benchmark results benchmark_result = utils.read_benchmark_result( benchmark_result_file_path) self._upload_execution_summary(benchmark_result, execution_id, output_dir) finally: logging.getLogger().removeHandler(filehandler)
def _run_internal(benchmark_method, harness_info, site_package_info, root_output_dir, config, queue): """Run benchmark method and put result to the queue. Args: benchmark_method: Canonical path to the benchmark method harness_info: Description of the benchmark harness used in the benchmark site_package_info: Description of the site-package used in the benchmark root_output_dir: Directory under which to put the benchmark output config: An instance of perfzero_config queue: An interprocess queue to transfer benchmark result to the caller """ start_timestamp = time.time() execution_timestamp = start_timestamp method_has_exception = False execution_id = (config.execution_id if config.execution_id else datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f')) output_dir = os.path.join(root_output_dir, execution_id) if config.scratch_gcs_url: model_output_dir = os.path.join(config.scratch_gcs_url, execution_id) else: model_output_dir = output_dir utils.make_dir_if_not_exist(output_dir) benchmark_class, benchmark_method_name = benchmark_method.rsplit('.', 1) benchmark_class_name = benchmark_class.rsplit('.', 1)[1] tensorflow_profiler = TensorFlowProfiler(config.profiler_enabled_time_str, output_dir) process_info_tracker = ProcessInfoTracker(output_dir) process_info = None # Setup per-method file logger filehandler = logging.FileHandler(filename=os.path.join( output_dir, 'perfzero.log'), mode='w') filehandler.setFormatter( logging.Formatter('%(asctime)s %(levelname)s: %(message)s')) logging.getLogger().addHandler(filehandler) try: if config.tpu_parameters: tpu = config.tpu_parameters.get('name') else: tpu = None if config.perfzero_constructor_args: constructor_args = json.loads(config.perfzero_constructor_args) else: constructor_args = {} class_instance = utils.instantiate_benchmark_class( benchmark_class=benchmark_class, output_dir=model_output_dir, root_data_dir=config.root_data_dir, tpu=tpu, constructor_args=constructor_args) # tf.test.Benchmark.report_benchmark() writes results to a file with # path benchmark_result_file_path_prefix + benchmark_method benchmark_result_file_path_prefix = os.path.join(output_dir, 'proto_') os.environ[ 'TEST_REPORT_FILE_PREFIX'] = benchmark_result_file_path_prefix benchmark_result_file_path = '{}{}.{}'.format( benchmark_result_file_path_prefix, benchmark_class_name, benchmark_method_name) # Start background threads for profiler and system info tracker tensorflow_profiler.start() process_info_tracker.start() # Run benchmark method execution_timestamp = time.time() logging.info('Starting benchmark execution: %s', benchmark_method) getattr(class_instance, benchmark_method_name)() logging.info('Stopped benchmark: %s', benchmark_method) # Read and build benchmark results raw_benchmark_result = utils.read_benchmark_result( benchmark_result_file_path) # Explicitly overwrite the name to be the full path to benchmark method raw_benchmark_result['name'] = benchmark_method except Exception: # pylint: disable=broad-except logging.error('Benchmark execution for %s failed due to error:\n %s', benchmark_method, traceback.format_exc()) method_has_exception = True raw_benchmark_result = {} raw_benchmark_result['name'] = benchmark_method raw_benchmark_result['wall_time'] = -1 raw_benchmark_result['extras'] = {} finally: # Stop background threads for profiler and system info tracker process_info = process_info_tracker.stop() tensorflow_profiler.stop() upload_timestamp = time.time() benchmark_result = report_utils.build_benchmark_result( raw_benchmark_result, method_has_exception) execution_summary = report_utils.build_execution_summary( execution_timestamp, execution_id, config.ml_framework_build_label, config.execution_label, config.platform_name, config.system_name, config.output_gcs_url, benchmark_result, config.get_env_vars(), config.get_flags(), harness_info, site_package_info, process_info, method_has_exception) report_utils.upload_execution_summary(config.bigquery_project_name, config.bigquery_dataset_table_name, execution_summary) report_utils.execute_methods(config.result_upload_methods, execution_summary) logging.info('Benchmark execution for %s completed with summary:\n %s', benchmark_method, json.dumps(execution_summary, indent=2)) _set_file_contents(json.dumps(execution_summary, indent=2), os.path.join(output_dir, 'perfzero_summary.json')) utils.maybe_upload_to_gcs(output_dir, config.output_gcs_url) logging.getLogger().removeHandler(filehandler) method_execution_time = { 'class_initialization': execution_timestamp - start_timestamp, 'method_execution': upload_timestamp - execution_timestamp, 'log_upload': time.time() - upload_timestamp } if config.profiler_enabled_time_str: relative_output_dir = output_dir[output_dir.find('benchmark'):] print('\nExecute the command below to start tensorboard server using ' 'the collected profiler data:\ntensorboard --logdir={}\n\n' 'Open localhost:6006 in your browser to access the Tensorbord ' 'GUI. Use ssh with port forwarding if tensorboard is running on ' 'a remote machine.\n'.format(relative_output_dir)) queue.put((method_has_exception, method_execution_time, benchmark_result['succeeded'], output_dir))
def run_benchmark(self): """Run benchmark.""" site_package_info = self._setup() has_exception = False benchmark_success_results = {} benchmark_output_dirs = {} for benchmark_method in self._get_benchmark_methods(): start_timestamp = time.time() execution_timestamp = start_timestamp method_has_exception = False execution_id = datetime.datetime.now().strftime( '%Y-%m-%d-%H-%M-%S-%f') output_dir = os.path.join(self.root_output_dir, execution_id) utils.make_dir_if_not_exist(output_dir) benchmark_output_dirs[benchmark_method] = output_dir benchmark_class, benchmark_method_name = benchmark_method.rsplit( '.', 1) benchmark_class_name = benchmark_class.rsplit('.', 1)[1] tensorflow_profiler = TensorFlowProfiler( self.config.profiler_enabled_time_str, output_dir) process_info_tracker = ProcessInfoTracker(output_dir) process_info = None # Setup per-method file logger filehandler = logging.FileHandler(filename=os.path.join( output_dir, 'perfzero.log'), mode='w') filehandler.setFormatter( logging.Formatter('%(asctime)s %(levelname)s: %(message)s')) logging.getLogger().addHandler(filehandler) try: class_instance = self._instantiate_benchmark_class( benchmark_class, output_dir, self.config.root_data_dir) # tf.test.Benchmark.report_benchmark() writes results to a file with # path benchmark_result_file_path_prefix + benchmark_method benchmark_result_file_path_prefix = os.path.join( output_dir, 'proto_') os.environ['TEST_REPORT_FILE_PREFIX'] = benchmark_result_file_path_prefix # pylint: disable=line-too-long benchmark_result_file_path = '{}{}.{}'.format( benchmark_result_file_path_prefix, benchmark_class_name, benchmark_method_name) # Start background threads for profiler and system info tracker tensorflow_profiler.start() process_info_tracker.start() # Run benchmark method execution_timestamp = time.time() logging.info('Starting benchmark execution: %s', benchmark_method) getattr(class_instance, benchmark_method_name)() logging.info('Stopped benchmark: %s', benchmark_method) # Read and build benchmark results raw_benchmark_result = utils.read_benchmark_result( benchmark_result_file_path) # Explicitly overwrite the name to be the full path to benchmark method raw_benchmark_result['name'] = benchmark_method except Exception: # pylint: disable=broad-except logging.error( 'Benchmark execution for %s failed due to error:\n %s', benchmark_method, traceback.format_exc()) method_has_exception = True has_exception = True raw_benchmark_result = {} raw_benchmark_result['name'] = benchmark_method raw_benchmark_result['wall_time'] = -1 raw_benchmark_result['extras'] = {} finally: # Stop background threads for profiler and system info tracker process_info = process_info_tracker.stop() tensorflow_profiler.stop() upload_timestamp = time.time() benchmark_result = report_utils.build_benchmark_result( raw_benchmark_result, method_has_exception) benchmark_success_results[benchmark_method] = benchmark_result['succeeded'] # pylint: disable=line-too-long execution_summary = report_utils.build_execution_summary( execution_timestamp, execution_id, self.config.ml_framework_build_label, self.config.execution_label, self.config.platform_name, self.config.system_name, self.config.output_gcs_url, benchmark_result, self.config.get_env_vars(), self.config.get_flags(), site_package_info, process_info, method_has_exception) report_utils.upload_execution_summary( self.config.bigquery_project_name, self.config.bigquery_dataset_table_name, execution_summary) logging.info( 'Benchmark execution for %s completed with summary:\n %s', benchmark_method, json.dumps(execution_summary, indent=2)) utils.maybe_upload_to_gcs(output_dir, self.config.output_gcs_url) logging.getLogger().removeHandler(filehandler) self.benchmark_execution_time[benchmark_method] = { 'class_initialization': execution_timestamp - start_timestamp, 'method_execution': upload_timestamp - execution_timestamp, 'log_upload': time.time() - upload_timestamp } if self.config.profiler_enabled_time_str: relative_output_dir = output_dir[output_dir.find('benchmark'):] print( '\nExecute the command below to start tensorboard server using ' 'the collected profiler data:\ntensorboard --logdir={}\n\n' 'Open localhost:6006 in your browser to access the Tensorbord ' 'GUI. Use ssh with port forwarding if tensorboard is running on ' 'a remote machine.\n'.format(relative_output_dir)) print('Benchmark execution time in seconds by operation:\n {}'.format( json.dumps(self.benchmark_execution_time, indent=2))) print('Benchmark success results:\n{}'.format( json.dumps(benchmark_success_results, indent=2))) print('Benchmark local output directories:\n{}'.format( json.dumps(benchmark_output_dirs, indent=2))) if has_exception: sys.exit(1)
def run_benchmark(self): """Run benchmark.""" site_package_info = self._setup() has_exception = False benchmark_success_results = {} for benchmark_method in self._get_benchmark_methods(): start_timestamp = time.time() method_has_exception = False execution_id = datetime.datetime.now().strftime( '%Y-%m-%d-%H-%M-%S-%f') execution_timestamp = time.time() output_dir = os.path.join(self.root_output_dir, execution_id) utils.make_dir_if_not_exist(output_dir) benchmark_class, benchmark_method_name = benchmark_method.rsplit( '.', 1) benchmark_class_name = benchmark_class.rsplit('.', 1)[1] # Setup per-method file logger filehandler = logging.FileHandler(filename=os.path.join( output_dir, 'perfzero.log'), mode='w') filehandler.setFormatter( logging.Formatter('%(asctime)s %(levelname)s: %(message)s')) logging.getLogger().addHandler(filehandler) try: class_instance = self._instantiate_benchmark_class( benchmark_class, output_dir) # tf.test.Benchmark.report_benchmark() writes results to a file with # path benchmark_result_file_path_prefix + benchmark_method benchmark_result_file_path_prefix = os.path.join( output_dir, 'proto_') os.environ['TEST_REPORT_FILE_PREFIX'] = benchmark_result_file_path_prefix # pylint: disable=line-too-long benchmark_result_file_path = '{}{}.{}'.format( benchmark_result_file_path_prefix, benchmark_class_name, benchmark_method_name) # Run benchmark method logging.info('Start benchmark: %s', benchmark_method) getattr(class_instance, benchmark_method_name)() logging.info('End benchmark: %s', benchmark_method) # Read and build benchmark results raw_benchmark_result = utils.read_benchmark_result(benchmark_result_file_path) # pylint: disable=line-too-long # Explicitly overwrite the name to be the full path to benchmark method raw_benchmark_result['name'] = benchmark_method except Exception: # pylint: disable=W0703 logging.error( 'Benchmark execution for %s failed due to error:\n %s', benchmark_method, traceback.format_exc()) method_has_exception = True has_exception = True raw_benchmark_result = {} raw_benchmark_result['name'] = benchmark_method raw_benchmark_result['wall_time'] = -1 raw_benchmark_result['extras'] = {} upload_timestamp = time.time() benchmark_result = report_utils.build_benchmark_result( raw_benchmark_result, method_has_exception) benchmark_success_results[benchmark_method] = benchmark_result['succeeded'] # pylint: disable=line-too-long execution_summary = report_utils.build_execution_summary( execution_timestamp, execution_id, self.config.ml_framework_build_label_str, self.config.execution_label_str, self.config.platform_name_str, self.config.system_name_str, self.config.output_gcs_url_str, benchmark_result, self.config.get_env_vars(), self.config.get_flags(), site_package_info, method_has_exception) report_utils.upload_execution_summary( self.config.bigquery_project_name_str, self.config.bigquery_dataset_table_name_str, execution_summary) logging.info( 'Benchmark execution for %s completed with summary:\n %s', benchmark_method, json.dumps(execution_summary, indent=2)) utils.maybe_upload_to_gcs(output_dir, self.config.output_gcs_url_str) logging.getLogger().removeHandler(filehandler) self.benchmark_execution_time[benchmark_method] = {} self.benchmark_execution_time[benchmark_method]['benchmark_time'] = upload_timestamp - start_timestamp # pylint: disable=line-too-long self.benchmark_execution_time[benchmark_method]['upload_time'] = time.time() - upload_timestamp # pylint: disable=line-too-long print('Benchmark execution time in seconds by operation:\n {}'.format( json.dumps(self.benchmark_execution_time, indent=2))) print('benchmark success results:\n{}'.format( json.dumps(benchmark_success_results, indent=2))) if has_exception: sys.exit(1)