def save(self, output_descriptor): """ Save contents of this instance into a (compressed) JSON file. Args: output_descriptor (str): A file name. """ IOUtils.write_json(output_descriptor, {'data': self.__benchmarks})
def build_tensorflow_human_labels(imagenet_dir, human_labels_file): """Builds a textual file with one synset on a line""" IOUtils.mkdirf(human_labels_file) labels = ImageNetTools.get_labels() with open(human_labels_file, 'w') as fobj: for label in labels: fobj.write("%s\t%s\n" % (label, labels[label]['human_labels']))
def run(self): """Runs subprocess with Popen. This method must not be called directly. Use blocking :py:meth:`~dlbs.Worker.work` method instead. """ try: # Dump parameters to a log file or to standard output DictUtils.ensure_exists(self.params, 'exp.log_file', default_value='') if self.params['exp.log_file'].strip() == '': self.params['exp.log_file'] = '/dev/stdout' IOUtils.mkdirf(self.params['exp.log_file']) with open(self.params['exp.log_file'], 'a+') as log_file: self.__dump_parameters(log_file) # This is where we launch process. Keep in mind, that the log file that's # supposed to be created is exp.log_file or exp_log_file in the script. # Other output of the launching script will be printed by this pyhton code # to a stanard output. self.process = subprocess.Popen(self.command, universal_newlines=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, env=self.environ) while True: output = self.process.stdout.readline() if output == '' and self.process.poll() is not None: break if output: sys.stdout.write(output) sys.stdout.flush() self.ret_code = self.process.poll() except Exception as err: logging.warn('Exception has been caught for experiment %s: %s', self.params.get('exp.id'), str(err)) logging.warn(traceback.format_exc()) self.ret_code = -1
def build_tensorflow_synsets(imagenet_dir, synset_file): """Builds a textual file with one synset on a line""" IOUtils.mkdirf(synset_file) labels = ImageNetTools.get_labels() with open(synset_file, 'w') as fobj: for label in labels: fobj.write("%s\n" % label)
def build_mxnet_labels(imagenet_dir, labels_file): """Generates a textual file with the following content: 0 45 n02093256/n02093256_3032.JPEG 1 45 n02093256/n02093256_3353.JPEG ... image_index image_class_label image_path """ IOUtils.mkdirf(labels_file) img_files = ImageNetTools.get_image_files(imagenet_dir) labels = ImageNetTools.get_labels() with open(labels_file, 'w') as fobj: for img_index, img_file in enumerate(img_files): synset, fname, finfo = ImageNetTools.get_file_info(img_file, labels) fobj.write("%d\t%d\t%s/%s\n" % (img_index, finfo['label'], synset, fname))
def build_caffe_labels(imagenet_dir, labels_file): """Generates a textual file with the following content: img_0000.jpeg 1 img_0001.jpeg 0 ... mapping image file name to its class label """ IOUtils.mkdirf(labels_file) img_files = ImageNetTools.get_image_files(imagenet_dir) labels = ImageNetTools.get_labels() with open(labels_file, 'w') as fobj: for img_file in img_files: synset, fname, finfo = ImageNetTools.get_file_info(img_file, labels) fobj.write("%s/%s %d\n" % (synset, fname, finfo['label']))
def main(): """Does all log parsing work.""" opts = parse_args() files = IOUtils.gather_files(opts['inputs'], "*.log", opts['recursive']) succeeded, failed = LogParser.parse_log_files(files, opts) def _dump_data(file_name, opts, data): with gzip.open(file_name, 'wb') if opts['_gz'] is True else open(file_name, 'w') as file_obj: json.dump({'data': data}, file_obj, indent=4) if opts['output_file'] is None: json.dump(succeeded, sys.stdout, indent=4, sort_keys=True) print ("") else: IOUtils.mkdirf(opts['output_file']) output_files = [] if len(failed) > 0: _dump_data(opts['_failed_file'], opts, failed) output_files.append(opts['_failed_file']) num_benchmarks = len(succeeded) if opts['num_output_files'] is not None: opts['benchmarks_per_file'] = int(math.ceil(float(num_benchmarks) / opts['num_output_files'])) if opts['benchmarks_per_file'] is not None: file_index = 0 while True: start_index = file_index * opts['benchmarks_per_file'] end_index = min(start_index + opts['benchmarks_per_file'], num_benchmarks) file_name = IOUtils.get_non_existing_file( "%s_%d.%s" % (opts['_output_file_without_ext'], file_index, opts['_ext']) ) _dump_data( file_name, opts, succeeded[start_index:end_index] ) output_files.append(file_name) if end_index >= num_benchmarks: break file_index += 1 else: _dump_data(opts['output_file'], opts, succeeded) output_files.append(opts['output_file']) print("Log parser summary.") print("Following files have been created:") json.dump(output_files, sys.stdout, indent=4, sort_keys=True) print ("")
def compute(log_dir, recursive): """ Finds files and compute experiments' statistics. :param std log_dir: Directory to search files for. :param bool recursive: If True, directory will be searched recursively. :return: Dictionary with experiment statistics. """ files = IOUtils.find_files(log_dir, "*.log", recursive) exps = LogParser.parse_log_files(files) stats = { 'num_log_files': len(files), 'num_failed_exps': 0, 'num_successful_exps': 0, 'failed_exps': {} } for exp in exps: time_val = str(exp['results.time']).strip() if 'results.time' in exp else '' if not time_val: stats['num_failed_exps'] += 1 stats['failed_exps'][exp['exp.id']] = { 'msg': 'No %s time found in log file.' % exp['exp.phase'], 'log_file': exp['exp.log_file'], 'phase': exp['exp.phase'], 'framework_title': exp['exp.framework_title'] } else: stats['num_successful_exps'] += 1 return stats
def get_selector(query): """Returns a callable object that returns true when `query` matches dictionary. Args: query: An object that specifies the query. It can be one of the following: - A string: - Load JSON object from this string if possible ELSE - Treat it as a file name and load JSON objects from there. The parsed/loaded object must be either dict or list. - A list of dict. Wrap it into a function that calls match method of a DictUtils class. - Callable object. Return as is. Returns: Callable object. """ # If it's a string, assume it's a JSON parsable string and if not - assume it's a JSON file name. if isinstance(query, Six.string_types): try: query = json.loads(query) except ValueError: query = IOUtils.read_json(query) selector = query # If it's a list of dict, wrap it into a function. if isinstance(query, (list, dict)): def dict_matcher(bench): return DictUtils.match(bench, query, policy='strict') selector = dict_matcher # Here, it must be a callable object. if not callable(selector): raise ValueError("Invalid type of object that holds parameters (%s)" % type(selector)) return selector
def parse(inputs, recursive=False, ignore_errors=False): """Parse benchmark log files (*.log). Args: inputs: Path specifiers of where to search for log files. recursive (bool): If true, parse directories found in `inputs` recursively. ignore_errors (bool): If true, ignore errors associated with parsing parameter values. Returns: Instance of this class. """ inputs = inputs if isinstance(inputs, list) else [inputs] log_files = set() for file_path in inputs: if os.path.isdir(file_path): log_files.update(IOUtils.gather_files(inputs, "*.log", recursive)) elif file_path.endswith('.log'): log_files.add(file_path) log_files = list(log_files) benchmarks = [] for log_file in log_files: parameters = {} with OpenFile(log_file, 'r') as logfile: # The 'must_match' must be set to false. It says that not # every line in a log file must match key-value pattern. DictUtils.add( parameters, logfile, pattern='[ \t]*__(.+?(?=__[ \t]*[=]))__[ \t]*=(.+)', must_match=False, ignore_errors=ignore_errors ) benchmarks.append(parameters) return BenchData(benchmarks, create_copy=False)
def action_benchdb(self): # print("Searching for benchmark archives ...") file_names = [] for file_type in ('*.tgz', '*.tar.gz', '*.json.gz'): file_names.extend( IOUtils.find_files(self.__args['inputs'][0], file_type, recursively=True)) print(" found {} benchmark files.".format(len(file_names))) # bench_data = {} print("Parsing benchmark archives ...") for file_name in file_names: BenchData.merge_benchmarks( bench_data, BenchData.load(file_name).as_dict(key_len=5)) print(" done [{}]".format(file_name)) print(" found {} benchmarks.".format(len(bench_data))) # print("Serializing benchmarks ...") with open('/dev/shm/benchmark_db.pickle', 'wb') as handle: pickle.dump(bench_data, handle, protocol=pickle.HIGHEST_PROTOCOL) # print(" database generation completed.")
def load(inputs, **kwargs): """Load benchmark data (parsed from log files) from a JSON file. A file name is a JSON file that contains object with 'data' field. This field is a list with dictionaries, each dictionary contains parameters for one benchmark: {"data":[{...}, {...}, {...}]} Args: inputs (str): File name of a JSON (*.json) or a compressed JSON (.json.gz) file. Returns: Instance of this class. """ is_json_file = IOUtils.is_json_file(inputs) if not is_json_file and isinstance(inputs, list) and len(inputs) == 1: is_json_file = IOUtils.is_json_file(inputs[0]) inputs = inputs[0] if is_json_file else inputs if is_json_file: benchmarks = IOUtils.read_json(inputs, check_extension=True) if 'data' not in benchmarks: benchmarks = {'data': []} print("[WARNING]: No benchmark data found in '{}'".format( inputs)) return BenchData(benchmarks['data'], create_copy=False) # is_csv_file = IOUtils.is_csv_file(inputs) if not is_csv_file and isinstance(inputs, list) and len(inputs) == 1: is_csv_file = IOUtils.is_csv_file(inputs[0]) inputs = inputs[0] if is_csv_file else inputs if is_csv_file: with OpenFile(inputs, 'r') as fobj: reader = csv.DictReader(fobj) benchmarks = list(reader) return BenchData(benchmarks, create_copy=False) # is_compressed_tarball = IOUtils.is_compressed_tarball(inputs) if not is_compressed_tarball and isinstance(inputs, list) and len(inputs) == 1: is_compressed_tarball = IOUtils.is_json_file(inputs[0]) inputs = inputs[0] if is_compressed_tarball else inputs if is_compressed_tarball: benchmarks = [] with tarfile.open(inputs, "r:gz") as archive: for member in archive.getmembers(): if member.isfile() and member.name.endswith('.log'): log_file = archive.extractfile(member) if log_file is not None: parameters = {} DictUtils.add( parameters, log_file, pattern= '[ \t]*__(.+?(?=__[ \t]*[=]))__[ \t]*=(.+)', must_match=False, ignore_errors=True) benchmarks.append(parameters) return BenchData(benchmarks, create_copy=False) # return BenchData.parse(inputs, **kwargs)
def load_data(**kwargs): is_dir = os.path.isdir(kwargs['input']) is_file = os.path.isfile(kwargs['input']) is_log_file = is_file and kwargs['input'].endswith('.log') is_json_file = is_file and (kwargs['input'].endswith('.json') or kwargs['input'].endswith('.json.gz')) if is_dir or is_log_file: files = IOUtils.find_files(config['input'], "*.log", config['recursive']) benchmarks, failed_benchmarks = LogParser.parse_log_files(files) benchmarks.extend(failed_benchmarks) elif is_json_file: benchmarks = IOUtils.read_json(kwargs['input']) benchmarks = benchmarks['data'] else: raise ValueError("Invalid input descriptor: {}".format( kwargs['input'])) return benchmarks
def compute(log_dir, recursive): """ Finds files and compute experiments' statistics. :param std log_dir: Directory to search files for. :param bool recursive: If True, directory will be searched recursively. :return: Dictionary with experiment statistics. """ files = IOUtils.find_files(log_dir, "*.log", recursive) benchmarks, failed_benchmarks = LogParser.parse_log_files(files) def _get(d, key, val=''): return d[key] if key in d else val stats = { 'num_log_files': len(files), 'num_failed_exps': 0, 'num_successful_exps': 0, 'failed_exps': {}, 'node_ids': set(), 'node_titles': set(), 'gpu_titles': set() } for bench in benchmarks: time_val = str(bench['results.time']).strip( ) if 'results.time' in bench else '' if not time_val: stats['num_failed_exps'] += 1 if 'exp.id' not in bench: print("[ERROR] No exp.id found in benchmark (%s)" % str(bench)) continue stats['failed_exps'][bench['exp.id']] = { 'msg': 'No %s time found in log file.' % _get(bench, 'exp.phase', 'PHASE_UNKNOWN'), 'log_file': _get(bench, 'exp.log_file', 'LOG_FILE_UNKNOWN'), 'phase': _get(bench, 'exp.phase', 'PHASE_UNKNOWN'), 'framework_title': _get(bench, 'exp.framework_title', 'FRAMEWORK_TITLE_UNKNOWN') } else: stats['num_successful_exps'] += 1 # for key in [('exp.node_id', 'node_ids'), ('exp.node_title', 'node_titles'), ('exp.gpu_title', 'gpu_titles')]: if key[0] in bench: stats[key[1]].add(bench[key[0]]) for key in ['node_ids', 'node_titles', 'gpu_titles']: stats[key] = list(stats[key]) return stats
def get_image_files(folder, shuffle=True, num_files=-1): """ Get *.JPEG files in folder. Shuffle files and return at most num_files files. """ # Scan the folder recursively and find files. files = IOUtils.find_files(folder, '*.JPEG', recursively=True) # Shuffle files and return first 'num_files' files. if shuffle: random.shuffle(files) if num_files > 0 and num_files < len(files): files = files[0:num_files] return files
def main(): """Does all log parsing work.""" parser = argparse.ArgumentParser() parser.add_argument('--summary-file', type=str, required=False, default=None, help='Write summary of experiments into this JSON file.') parser.add_argument('--log-dir', type=str, required=False, default=None, help='Scan this folder for *.log files. Scan recursively if --recursive is set.') parser.add_argument('--recursive', required=False, default=False, action='store_true', help='Scan --log-dir folder recursively for log files.') parser.add_argument('--keys', nargs='*', required=False, help='Parameters to extract from log files. If not set or empty, all parameters are returned.') parser.add_argument('--strict', action='store_true', default=False, help='If set, serialzie only those results that contain all keys specified with --keys arg.') parser.add_argument('log_files', nargs='*', help='Log files to parse') args = parser.parse_args() files = [] if len(args.log_files) > 0: files = args.log_files elif args.log_dir is not None: files = IOUtils.find_files(args.log_dir, "*.log", args.recursive) params = LogParser.parse_log_files(files, keys=args.keys) if args.strict and len(args.keys) > 0: filtered_params = [] for param in params: param_ok = True for key in args.keys: if key not in param: param_ok = False #print ("skipping because missing field %s" % key) break if param_ok: filtered_params.append(param) params = filtered_params summary = {"data": params} if args.summary_file is None: json.dump(summary, sys.stdout, indent=4, sort_keys=True) print ("") else: DictUtils.dump_json_to_file(summary, args.summary_file)
def main(): """Entry point when invoking this scrip from a command line.""" parser = argparse.ArgumentParser() parser.add_argument('inputs', nargs='*', help='Log directory or a JSON file') parser.add_argument( '--recursive', required=False, default=False, action='store_true', help='If input is folder, scan it recursively for log files.') parser.add_argument('--xparam', type=str, required=True, default=None, help='A parameter that is associated with x axis.') parser.add_argument('--yparam', type=str, required=True, default=None, help='A parameter that is associated with y axis.') parser.add_argument('--series', type=str, required=True, default=None, help='A json array with filters for series.') parser.add_argument( '--aggregation', type=str, required=True, default="avg", help= 'In case of multiple matches, use this to aggregate values (min, max, avg)' ) parser.add_argument('--chart_file', '--chart-file', type=str, required=False, default=None, help='If present, write chart into this file.') parser.add_argument( '--series_file', '--series-file', type=str, required=False, default=None, help='If present, write series JSON data into this file.') parser.add_argument( '--chart_opts', '--chart-opts', type=str, required=False, default=None, help='If present, a json object specifying chart options.') parser.add_argument('--chart_type', '--chart-type', type=str, required=False, default='line', help='Type of a chart ("line" or "bar").') parser.add_argument( '--baseline_xvalue', '--baseline-xvalue', type=str, required=False, default=None, help= "A value that's used to normalize one series. Useful to plot speedup charts." ) parser.add_argument( '--baseline_series', '--baseline-series', type=int, required=False, default=None, help="An index of a baseline series to use to normalize all series.") args = parser.parse_args() if len(args.inputs) == 0: raise ValueError("Must be at least one input ('--input')") # Parse log files and load benchmark data logfiles = [] # Original raw log files with benchmark data benchmarks = [] # Parsed benchmarks for input_path in args.inputs: if os.path.isdir(input_path): logfiles.extend( IOUtils.find_files(input_path, "*.log", args.recursive)) elif os.path.isfile(input_path) and input_path.endswith( ('.json', '.json.gz')): file_benchmarks = IOUtils.read_json(input_path) if 'data' in file_benchmarks and isinstance( file_benchmarks['data'], list): benchmarks.extend(file_benchmarks['data']) else: logging.warn("Cannot parse file (%s). Invalid content.", input_path) else: logging.warn("Cannot parse file (%s). Unknown extension. ", input_path) if len(logfiles) > 0: benchmarks.extend(LogParser.parse_log_files(logfiles)) else: logging.warn("No input log files have been found") if len(benchmarks) == 0: raise ValueError("No benchmarks have been loaded.") # Build data for series chart_data = SeriesBuilder.build(benchmarks, args) # Write it if args.series_file: DictUtils.dump_json_to_file(chart_data, args) # Plot it if args.chart_file: SeriesBuilder.plot(chart_data, args)
parser.add_argument('--log_file', '--log-file', type=str, required=False, default=None, help="Get batch statistics from this experiment.") parser.add_argument( '--recursive', required=False, default=False, action='store_true', help='Scan --log-dir folder recursively for log files.') args = parser.parse_args() if args.log_dir is not None: files = IOUtils.find_files(args.log_dir, "*.log", args.recursive) else: files = [] if args.log_file is not None: files.append(args.log_file) save_file = None if args.save_file is not None: save_file = args.save_file exps = LogParser.parse_log_files(files) for exp in exps: key = 'results.time_data' if key not in exp: continue times = exp[key]
def action_summary(self): IOUtils.write_json(self.__args['output'], self.__data.summary(), check_extension=False)