def compute(log_dir, recursive): """ Finds files and compute experiments' statistics. :param std log_dir: Directory to search files for. :param bool recursive: If True, directory will be searched recursively. :return: Dictionary with experiment statistics. """ files = IOUtils.find_files(log_dir, "*.log", recursive) exps = LogParser.parse_log_files(files) stats = { 'num_log_files': len(files), 'num_failed_exps': 0, 'num_successful_exps': 0, 'failed_exps': {} } for exp in exps: time_val = str(exp['results.time']).strip() if 'results.time' in exp else '' if not time_val: stats['num_failed_exps'] += 1 stats['failed_exps'][exp['exp.id']] = { 'msg': 'No %s time found in log file.' % exp['exp.phase'], 'log_file': exp['exp.log_file'], 'phase': exp['exp.phase'], 'framework_title': exp['exp.framework_title'] } else: stats['num_successful_exps'] += 1 return stats
def action_benchdb(self): # print("Searching for benchmark archives ...") file_names = [] for file_type in ('*.tgz', '*.tar.gz', '*.json.gz'): file_names.extend( IOUtils.find_files(self.__args['inputs'][0], file_type, recursively=True)) print(" found {} benchmark files.".format(len(file_names))) # bench_data = {} print("Parsing benchmark archives ...") for file_name in file_names: BenchData.merge_benchmarks( bench_data, BenchData.load(file_name).as_dict(key_len=5)) print(" done [{}]".format(file_name)) print(" found {} benchmarks.".format(len(bench_data))) # print("Serializing benchmarks ...") with open('/dev/shm/benchmark_db.pickle', 'wb') as handle: pickle.dump(bench_data, handle, protocol=pickle.HIGHEST_PROTOCOL) # print(" database generation completed.")
def compute(log_dir, recursive): """ Finds files and compute experiments' statistics. :param std log_dir: Directory to search files for. :param bool recursive: If True, directory will be searched recursively. :return: Dictionary with experiment statistics. """ files = IOUtils.find_files(log_dir, "*.log", recursive) benchmarks, failed_benchmarks = LogParser.parse_log_files(files) def _get(d, key, val=''): return d[key] if key in d else val stats = { 'num_log_files': len(files), 'num_failed_exps': 0, 'num_successful_exps': 0, 'failed_exps': {}, 'node_ids': set(), 'node_titles': set(), 'gpu_titles': set() } for bench in benchmarks: time_val = str(bench['results.time']).strip( ) if 'results.time' in bench else '' if not time_val: stats['num_failed_exps'] += 1 if 'exp.id' not in bench: print("[ERROR] No exp.id found in benchmark (%s)" % str(bench)) continue stats['failed_exps'][bench['exp.id']] = { 'msg': 'No %s time found in log file.' % _get(bench, 'exp.phase', 'PHASE_UNKNOWN'), 'log_file': _get(bench, 'exp.log_file', 'LOG_FILE_UNKNOWN'), 'phase': _get(bench, 'exp.phase', 'PHASE_UNKNOWN'), 'framework_title': _get(bench, 'exp.framework_title', 'FRAMEWORK_TITLE_UNKNOWN') } else: stats['num_successful_exps'] += 1 # for key in [('exp.node_id', 'node_ids'), ('exp.node_title', 'node_titles'), ('exp.gpu_title', 'gpu_titles')]: if key[0] in bench: stats[key[1]].add(bench[key[0]]) for key in ['node_ids', 'node_titles', 'gpu_titles']: stats[key] = list(stats[key]) return stats
def get_image_files(folder, shuffle=True, num_files=-1): """ Get *.JPEG files in folder. Shuffle files and return at most num_files files. """ # Scan the folder recursively and find files. files = IOUtils.find_files(folder, '*.JPEG', recursively=True) # Shuffle files and return first 'num_files' files. if shuffle: random.shuffle(files) if num_files > 0 and num_files < len(files): files = files[0:num_files] return files
def load_data(**kwargs): is_dir = os.path.isdir(kwargs['input']) is_file = os.path.isfile(kwargs['input']) is_log_file = is_file and kwargs['input'].endswith('.log') is_json_file = is_file and (kwargs['input'].endswith('.json') or kwargs['input'].endswith('.json.gz')) if is_dir or is_log_file: files = IOUtils.find_files(config['input'], "*.log", config['recursive']) benchmarks, failed_benchmarks = LogParser.parse_log_files(files) benchmarks.extend(failed_benchmarks) elif is_json_file: benchmarks = IOUtils.read_json(kwargs['input']) benchmarks = benchmarks['data'] else: raise ValueError("Invalid input descriptor: {}".format( kwargs['input'])) return benchmarks
def main(): """Does all log parsing work.""" parser = argparse.ArgumentParser() parser.add_argument('--summary-file', type=str, required=False, default=None, help='Write summary of experiments into this JSON file.') parser.add_argument('--log-dir', type=str, required=False, default=None, help='Scan this folder for *.log files. Scan recursively if --recursive is set.') parser.add_argument('--recursive', required=False, default=False, action='store_true', help='Scan --log-dir folder recursively for log files.') parser.add_argument('--keys', nargs='*', required=False, help='Parameters to extract from log files. If not set or empty, all parameters are returned.') parser.add_argument('--strict', action='store_true', default=False, help='If set, serialzie only those results that contain all keys specified with --keys arg.') parser.add_argument('log_files', nargs='*', help='Log files to parse') args = parser.parse_args() files = [] if len(args.log_files) > 0: files = args.log_files elif args.log_dir is not None: files = IOUtils.find_files(args.log_dir, "*.log", args.recursive) params = LogParser.parse_log_files(files, keys=args.keys) if args.strict and len(args.keys) > 0: filtered_params = [] for param in params: param_ok = True for key in args.keys: if key not in param: param_ok = False #print ("skipping because missing field %s" % key) break if param_ok: filtered_params.append(param) params = filtered_params summary = {"data": params} if args.summary_file is None: json.dump(summary, sys.stdout, indent=4, sort_keys=True) print ("") else: DictUtils.dump_json_to_file(summary, args.summary_file)
parser.add_argument('--log_file', '--log-file', type=str, required=False, default=None, help="Get batch statistics from this experiment.") parser.add_argument( '--recursive', required=False, default=False, action='store_true', help='Scan --log-dir folder recursively for log files.') args = parser.parse_args() if args.log_dir is not None: files = IOUtils.find_files(args.log_dir, "*.log", args.recursive) else: files = [] if args.log_file is not None: files.append(args.log_file) save_file = None if args.save_file is not None: save_file = args.save_file exps = LogParser.parse_log_files(files) for exp in exps: key = 'results.time_data' if key not in exp: continue times = exp[key]
def main(): """Entry point when invoking this scrip from a command line.""" parser = argparse.ArgumentParser() parser.add_argument('inputs', nargs='*', help='Log directory or a JSON file') parser.add_argument( '--recursive', required=False, default=False, action='store_true', help='If input is folder, scan it recursively for log files.') parser.add_argument('--xparam', type=str, required=True, default=None, help='A parameter that is associated with x axis.') parser.add_argument('--yparam', type=str, required=True, default=None, help='A parameter that is associated with y axis.') parser.add_argument('--series', type=str, required=True, default=None, help='A json array with filters for series.') parser.add_argument( '--aggregation', type=str, required=True, default="avg", help= 'In case of multiple matches, use this to aggregate values (min, max, avg)' ) parser.add_argument('--chart_file', '--chart-file', type=str, required=False, default=None, help='If present, write chart into this file.') parser.add_argument( '--series_file', '--series-file', type=str, required=False, default=None, help='If present, write series JSON data into this file.') parser.add_argument( '--chart_opts', '--chart-opts', type=str, required=False, default=None, help='If present, a json object specifying chart options.') parser.add_argument('--chart_type', '--chart-type', type=str, required=False, default='line', help='Type of a chart ("line" or "bar").') parser.add_argument( '--baseline_xvalue', '--baseline-xvalue', type=str, required=False, default=None, help= "A value that's used to normalize one series. Useful to plot speedup charts." ) parser.add_argument( '--baseline_series', '--baseline-series', type=int, required=False, default=None, help="An index of a baseline series to use to normalize all series.") args = parser.parse_args() if len(args.inputs) == 0: raise ValueError("Must be at least one input ('--input')") # Parse log files and load benchmark data logfiles = [] # Original raw log files with benchmark data benchmarks = [] # Parsed benchmarks for input_path in args.inputs: if os.path.isdir(input_path): logfiles.extend( IOUtils.find_files(input_path, "*.log", args.recursive)) elif os.path.isfile(input_path) and input_path.endswith( ('.json', '.json.gz')): file_benchmarks = IOUtils.read_json(input_path) if 'data' in file_benchmarks and isinstance( file_benchmarks['data'], list): benchmarks.extend(file_benchmarks['data']) else: logging.warn("Cannot parse file (%s). Invalid content.", input_path) else: logging.warn("Cannot parse file (%s). Unknown extension. ", input_path) if len(logfiles) > 0: benchmarks.extend(LogParser.parse_log_files(logfiles)) else: logging.warn("No input log files have been found") if len(benchmarks) == 0: raise ValueError("No benchmarks have been loaded.") # Build data for series chart_data = SeriesBuilder.build(benchmarks, args) # Write it if args.series_file: DictUtils.dump_json_to_file(chart_data, args) # Plot it if args.chart_file: SeriesBuilder.plot(chart_data, args)