def get_memory_usage_graphics(): """ Get graphic representation of memory usage. Returns: Response, the graphic representation of memory usage. Examples: >>> GET http://xxxx/v1/mindinsight/profile/memory-graphics """ summary_dir = request.args.get("dir") profiler_dir_abs = validate_and_normalize_profiler_path( summary_dir, settings.SUMMARY_BASE_DIR) check_train_job_and_profiler_dir(profiler_dir_abs) device_id = request.args.get("device_id", default='0') to_int(device_id, 'device_id') device_type = request.args.get("device_type", default='ascend') if device_type not in ['ascend']: logger.info( "Invalid device_type, Memory Usage only supports Ascend for now.") raise ParamValueError("Invalid device_type.") analyser = AnalyserFactory.instance().get_analyser('memory_usage', profiler_dir_abs, device_id) graphics = analyser.get_memory_usage_graphics(device_type) return graphics
def get_memory_usage_breakdowns(): """ Get memory breakdowns of each node. Returns: Response, the memory breakdowns for each node. Examples: >>> GET http://xxxx/v1/mindinsight/profile/memory-breakdowns """ summary_dir = request.args.get("dir") profiler_dir_abs = validate_and_normalize_profiler_path( summary_dir, settings.SUMMARY_BASE_DIR) check_train_job_and_profiler_dir(profiler_dir_abs) device_id = request.args.get("device_id", default='0') to_int(device_id, 'device_id') device_type = request.args.get("device_type", default='ascend') graph_id = request.args.get("graph_id", default='0') node_id = request.args.get("node_id", default='0') node_id = to_int(node_id, 'node_id') if device_type not in ['ascend']: logger.error( "Invalid device_type, Memory Usage only supports Ascend for now.") raise ParamValueError("Invalid device_type.") analyser = AnalyserFactory.instance().get_analyser('memory_usage', profiler_dir_abs, device_id) breakdowns = analyser.get_memory_usage_breakdowns(device_type, graph_id, node_id) return breakdowns
def get_timeline_summary(): """ Get timeline summary info. Returns: Response, the timeline summary info. Examples: >>> GET http://xxxx/v1/mindinsight/profile/timeline-summary """ summary_dir = request.args.get("dir") profiler_dir_abs = validate_and_normalize_profiler_path( summary_dir, settings.SUMMARY_BASE_DIR) check_train_job_and_profiler_dir(profiler_dir_abs) device_id = request.args.get("device_id", default='0') to_int(device_id, 'device_id') device_type = request.args.get("device_type", default='ascend') if device_type not in ['gpu', 'ascend']: logger.info( "Invalid device_type, device_type should be gpu or ascend.") raise ParamValueError("Invalid device_type.") analyser = AnalyserFactory.instance().get_analyser('timeline', profiler_dir_abs, device_id) summary = analyser.get_timeline_summary(device_type) return summary
def get_timeline_detail(): """ Get timeline detail. Returns: Response, the detail information of timeline. Examples: >>> GET http://xxxx/v1/mindinsight/profile/timeline """ summary_dir = request.args.get("dir") profiler_dir_abs = validate_and_normalize_profiler_path( summary_dir, settings.SUMMARY_BASE_DIR) check_train_job_and_profiler_dir(profiler_dir_abs) device_id = request.args.get("device_id", default='0') to_int(device_id, 'device_id') device_type = request.args.get("device_type", default='ascend') scope_name_num = request.args.get("scope_name_num", default='0') if device_type not in ['gpu', 'ascend']: logger.info( "Invalid device_type, device_type should be gpu or ascend.") raise ParamValueError("Invalid device_type.") analyser = AnalyserFactory.instance().get_analyser('timeline', profiler_dir_abs, device_id) timeline = analyser.get_display_timeline(device_type, scope_name_num) return jsonify(timeline)
def get_training_trace_graph(): """ Get training trace info of one step. Returns: Response, the training trace info of one step. Examples: >>> GET http://xxxx/v1/mindinsight/profile/training-trace/graph """ summary_dir = request.args.get("dir") profiler_dir_abs = validate_and_normalize_profiler_path( summary_dir, settings.SUMMARY_BASE_DIR) check_train_job_and_profiler_dir(profiler_dir_abs) graph_type = request.args.get("type", default='0') graph_type = to_int(graph_type, 'graph_type') device_id = request.args.get("device_id", default='0') _ = to_int(device_id, 'device_id') graph_info = {} try: analyser = AnalyserFactory.instance().get_analyser( 'step_trace', profiler_dir_abs, device_id) except ProfilerFileNotFoundException: return jsonify(graph_info) graph_info = analyser.query( {'filter_condition': { 'mode': 'step', 'step_id': graph_type }}) graph_info['summary'] = analyser.summary graph_info['point_info'] = analyser.point_info return jsonify(graph_info)
def get_target_time_info(): """ Get all the time information of the specified column. Returns: Response, all the time information of the specified column. Examples: >>> GET http://xxxx/v1/mindinsight/profile/training-trace/target-time-info """ summary_dir = request.args.get("dir") profiler_dir_abs = validate_and_normalize_profiler_path( summary_dir, settings.SUMMARY_BASE_DIR) check_train_job_and_profiler_dir(profiler_dir_abs) proc_name = request.args.get("type") validate_ui_proc(proc_name) device_id = request.args.get("device_id", default='0') to_int(device_id, 'device_id') analyser = AnalyserFactory.instance().get_analyser('step_trace', profiler_dir_abs, device_id) target_time_info = analyser.query( {'filter_condition': { 'mode': 'proc', 'proc_name': proc_name }}) target_time_info['summary'] = analyser.summary return jsonify(target_time_info)
def get_timeline_detail(): """ Get timeline detail. Returns: Response, the detail information of timeline. Examples: >>> GET http://xxxx/v1/mindinsight/profile/timeline """ summary_dir = request.args.get("dir") profiler_dir = validate_and_normalize_profiler_path( summary_dir, settings.SUMMARY_BASE_DIR) if not os.path.exists(profiler_dir): raise ProfilerDirNotFoundException(msg=summary_dir) device_id = request.args.get("device_id", default='0') _ = to_int(device_id, 'device_id') device_type = request.args.get("device_type", default='ascend') if device_type not in ['gpu', 'ascend']: logger.info( "Invalid device_type, device_type should be gpu or ascend.") raise ParamValueError("Invalid device_type.") analyser = AnalyserFactory.instance().get_analyser('timeline', profiler_dir, device_id) timeline = analyser.get_display_timeline(device_type) return jsonify(timeline)
def get_training_trace_graph(): """ Get training trace info of one step. Returns: Response, the training trace info of one step. Examples: >>> GET http://xxxx/v1/mindinsight/profile/training-trace/graph """ summary_dir = request.args.get("dir") profiler_dir_abs = validate_and_normalize_profiler_path( summary_dir, settings.SUMMARY_BASE_DIR) check_train_job_and_profiler_dir(profiler_dir_abs) graph_type = request.args.get("type", default='0') graph_type = to_int(graph_type, 'graph_type') device_id = request.args.get("device_id", default='0') to_int(device_id, 'device_id') graph_info = {} try: analyser = AnalyserFactory.instance().get_analyser( 'step_trace', profiler_dir_abs, device_id) except ProfilerFileNotFoundException: return jsonify(graph_info) graph_info = analyser.query( {'filter_condition': { 'mode': 'step', 'step_id': graph_type }}) graph_info['summary'] = analyser.summary graph_info['point_info'] = analyser.point_info(graph_type) graph_info['is_heterogeneous'] = False # In heterogeneous training scene, do not display step trace data. cpu_op_type_file_name = f"cpu_op_type_info_{device_id}.csv" if cpu_op_type_file_name in os.listdir(profiler_dir_abs): graph_info = {'is_heterogeneous': True} return jsonify(graph_info)
def get_timeline_detail(): """ Get timeline detail. Returns: Response, the detail information of timeline. Examples: >>> GET http://xxxx/v1/mindinsight/profile/timeline """ summary_dir = request.args.get("dir") profiler_dir = validate_and_normalize_profiler_path( summary_dir, settings.SUMMARY_BASE_DIR) device_id = request.args.get("device_id", default='0') _ = to_int(device_id, 'device_id') analyser = AnalyserFactory.instance().get_analyser('timeline', profiler_dir, device_id) timeline = analyser.get_display_timeline() return jsonify(timeline)
def get_timeline_summary(): """ Get timeline summary info. Returns: Response, the timeline summary info. Examples: >>> GET http://xxxx/v1/mindinsight/profile/timeline-summary """ summary_dir = request.args.get("dir") profiler_dir = validate_and_normalize_profiler_path( summary_dir, settings.SUMMARY_BASE_DIR) device_id = request.args.get("device_id", default='0') _ = to_int(device_id, 'device_id') analyser = AnalyserFactory.instance().get_analyser('timeline', profiler_dir, device_id) summary = analyser.get_timeline_summary() return summary
def get_flops_summary(): """ Get flops summary info. Returns: Response, the flops summary info. Examples: >>> GET http://xxxx/v1/mindinsight/profile/flops-summary """ train_id = request.args.get("train_id") profiler_dir_abs = validate_and_normalize_profiler_path( train_id, settings.SUMMARY_BASE_DIR) check_train_job_and_profiler_dir(profiler_dir_abs) device_id = request.args.get("device_id", default='0') to_int(device_id, 'device_id') analyser = AnalyserFactory.instance().get_analyser('flops', profiler_dir_abs, device_id) summary = analyser.get_flops_summary() return summary
def get_timeline_summary(): """ Get timeline summary info. Returns: Response, the timeline summary info. Examples: >>> GET http://xxxx/v1/mindinsight/profile/timeline-summary """ summary_dir = request.args.get("dir") profiler_dir = validate_and_normalize_profiler_path( summary_dir, settings.SUMMARY_BASE_DIR) if not os.path.exists(profiler_dir): msg = 'The profiler dir is not found!' raise ProfilerDirNotFoundException(msg=msg) device_id = request.args.get("device_id", default='0') _ = to_int(device_id, 'device_id') analyser = AnalyserFactory.instance().get_analyser('timeline', profiler_dir, device_id) summary = analyser.get_timeline_summary() return summary