def build_cache(self, summary_file, target_variable, query): """Loads data from json file.""" with OpenFile(summary_file) as file_obj: summary = json.load(file_obj) self.cache = {} self.nets = Set() self.batches = Set() self.devices = Set() for experiment in summary['data']: if target_variable not in experiment: print("target variable not in experiment, skipping") continue if not DictUtils.match(experiment, query, policy='strict'): continue # batch is an effective batch here key = '{0}_{1}_{2}'.format( experiment['exp.model_title'], experiment['exp.gpus'], experiment['exp.effective_batch'] ) self.cache[key] = float(experiment[target_variable]) self.nets.add(experiment['exp.model_title']) self.batches.add(int(experiment['exp.effective_batch'])) self.devices.add(str(experiment['exp.gpus'])) self.nets = sorted(list(self.nets)) self.batches = sorted(list(self.batches)) self.devices = sorted(list(self.devices), key=len)
def test_match_5(self): """dlbs -> TestDictUtils::test_match_5 [Testing matching helpers #5]""" dictionary = {'exp.framework': "bvlc_caffe", 'exp.model': "ResNet150"} matches = {} self.assertEquals( DictUtils.match(dictionary, {'exp.framework': '([^_]+)D(.+)'}, policy='strict', matches=matches), False) self.assertEquals(len(matches), 0)
def test_match_6(self): """Test empty strings can match""" dictionary = {'exp.framework': "bvlc_caffe", 'exp.data_dir': ""} # matches = {} for val in ('', ' ', ' ', ' '): self.assertEquals( DictUtils.match(dictionary, {'exp.framework': val}, policy='strict', matches=matches), False) self.assertEqual(len(matches), 0) # self.assertEquals( DictUtils.match(dictionary, {'exp.data_dir': ''}, policy='strict', matches=matches), True) self.assertEqual(len(matches), 1) self.assertIn('exp.data_dir_0', matches) self.assertEqual(matches['exp.data_dir_0'], '')
def test_match_3(self): """dlbs -> TestDictUtils::test_match_3 [Testing matching helpers #3]""" dictionary = {'exp.framework': "bvlc_caffe", 'exp.model': "ResNet150"} matches = {} self.assertEquals( DictUtils.match(dictionary, {'exp.framework': '([^_]+)_(.+)'}, policy='strict', matches=matches), True) self.assertEquals(len(matches), 3) self.assertEquals(matches['exp.framework_0'], 'bvlc_caffe') self.assertEquals(matches['exp.framework_1'], 'bvlc') self.assertEquals(matches['exp.framework_2'], 'caffe')
def test_match_2(self): """dlbs -> TestDictUtils::test_match_2 [Testing matching helpers #2]""" dictionary = {'exp.framework': "bvlc_caffe", 'exp.model': "ResNet150"} matches = {} self.assertEquals( DictUtils.match(dictionary, {'exp.model': r'([^\d]+)(\d+)'}, policy='strict', matches=matches), True) self.assertEquals(len(matches), 3) self.assertEquals(matches['exp.model_0'], 'ResNet150') self.assertEquals(matches['exp.model_1'], 'ResNet') self.assertEquals(matches['exp.model_2'], '150')
def parse_log_files(filenames, opts=None): """ Parses files and returns their parameters. :param list filenames: List of file names to parse. :param dict opts: Dictionary of options. :rtype: tuple<list, list> :return: A tuple of two lists - succeeded and failed benchmarks """ opts = {} if opts is None else opts for key in ('filter_params', 'filter_query', 'output_params'): DictUtils.ensure_exists(opts, key) DictUtils.ensure_exists(opts, 'failed_benchmarks', 'discard') DictUtils.ensure_exists(opts, '_extended_params', {}) DictUtils.ensure_exists(opts, 'ignore_errors', False) succeeded_benchmarks = [] failed_benchmarks = [] for filename in filenames: # Parse log file params = LogParser.parse_log_file( filename, ignore_errors=opts['ignore_errors']) # Check if this benchmark does not match filter if len(params) == 0 or \ not DictUtils.contains(params, opts['filter_params']) or \ not DictUtils.match(params, opts['filter_query']): continue # Add extended parameters and compute them if len(opts['_extended_params']) > 0: params.update(opts['_extended_params']) Processor().compute_variables([params]) # params = params[0] # Identify is this benchmark succeeded of failed. succeeded = 'results.throughput' in params and \ isinstance(params['results.throughput'], (int, long, float)) and \ params['results.throughput'] > 0 # Get only those key/values that need to be serialized params = DictUtils.subdict(params, opts['output_params']) # Append benchmark either to succeeded or failed list if succeeded: succeeded_benchmarks.append(params) else: if opts['failed_benchmarks'] == 'keep': succeeded_benchmarks.append(params) elif opts['failed_benchmarks'] == 'keep_separately': failed_benchmarks.append(params) # return succeeded_benchmarks, failed_benchmarks
def test_match_4(self): """dlbs -> TestDictUtils::test_match_4 [Testing matching helpers #4]""" dictionary = {'exp.framework': "bvlc_caffe", 'exp.model': "ResNet150"} self.assertEquals( DictUtils.match(dictionary, {'exp.framework': '([^_]+)_(.+)'}, policy='strict'), True)
def test_match_1(self): """dlbs -> TestDictUtils::test_match_1 [Testing matching helpers #1]""" for frameworks in [ self.framework, [self.framework], [self.framework, "Caffe2"] ]: # We can match against existing key with strict policy self.assertEquals( DictUtils.match(self.dictionary, {'exp.framework': frameworks}, policy='strict'), True) # We cannot match against non existing key with strict policy self.assertEquals( DictUtils.match(self.dictionary, {'exp.framework_id': self.framework}, policy='strict'), False) # We can match against non existing key with relaxed policy self.assertEquals( DictUtils.match(self.dictionary, {'exp.framework_id': self.framework}, policy='relaxed'), True) # Key exist, different values self.assertEquals( DictUtils.match(self.dictionary, {'exp.framework': 'Caffe2'}, policy='strict'), False) # AND condition + strict policy self.assertEquals( DictUtils.match(self.dictionary, { 'exp.framework': self.framework, 'exp.device_batch': self.device_batch }, policy='strict'), True) # AND condition self.assertEquals( DictUtils.match(self.dictionary, { 'exp.framework': [self.framework, 'Caffe2'], 'exp.device_batch': self.device_batch }, policy='strict'), True) self.assertEquals( DictUtils.match(self.dictionary, { 'exp.framework': self.framework, 'exp.device_batch': 2 * self.device_batch }, policy='strict'), False) # AND condition relaxed policy self.assertEquals( DictUtils.match(self.dictionary, { 'exp.framework': self.framework, 'exp.effective_batch': 2 * self.device_batch }, policy='relaxed'), True) # AND condition self.assertEquals( DictUtils.match(self.dictionary, { 'exp.framework': [self.framework, 'Caffe2'], 'exp.effective_batch': 2 * self.device_batch }, policy='relaxed'), True) # Relaxed policy with multiple fields that exist and do not match self.assertEquals( DictUtils.match(self.dictionary, { 'exp.framework': self.framework, 'exp.device_batch': 2 * self.device_batch }, policy='relaxed'), False)
def build(benchmarks, args): """Creates a JSON object that can be used to plot charts. :param list benchmarks: An array of benchmarks :param obj args: A result of argparse.parse. Contains parameters defining the chart. """ series_filters = json.loads(args.series) # During pre-processing step, we store series as dictionaries mapping # X to Y. Then, we convert it into array. chart_data = { 'ylabel': args.yparam, # Benchmark parameter for Y-axis 'xlabel': args.xparam, # Benchmark parameter for X-axis 'series': [], # List of {'filters': dict(), 'data': dict()} 'xvals': set() # Possible values for X-axis } for series_filter in series_filters: chart_data['series'].append({ 'filters': series_filter, 'data': defaultdict(list) }) # Iterate over each benchmark and see if it needs to go into series for benchmark in benchmarks: # Without 'x' or 'y' data we cannot do anything. if args.xparam not in benchmark or args.yparam not in benchmark: continue # Iterate over series (their filters) for idx, series_filter in enumerate(series_filters): # If we cannot match all keys from query, ignore it if not DictUtils.match( benchmark, series_filter, policy='strict'): continue xval = str(benchmark[args.xparam]) yval = benchmark[args.yparam] chart_data['series'][idx]['data'][xval].append(yval) chart_data['xvals'].add(xval) # Perform final aggregation reducers = { 'min': min, 'max': max, 'avg': lambda arr: float(sum(arr)) / len(arr) } reducer = reducers[args.aggregation] baseline_xvalue_exists = True for series in chart_data['series']: # Reduce multiple matches for xval in series['data']: series['data'][xval] = reducer(series['data'][xval]) # Check if normalization to a baseline X value is possible if args.baseline_xvalue and args.baseline_xvalue not in series[ 'data']: baseline_xvalue_exists = False # In-series normalization with respect to baseline value. It's performed # only when all series can be normalized. if args.baseline_xvalue and baseline_xvalue_exists: for series in chart_data['series']: baseline_val = series['data'][args.baseline_xvalue] for xval in series['data']: series['data'][xval] /= baseline_val # Normalization with respect to baseline series if args.baseline_series: # We will normalize only when all values from other series can be scaled # i.e. baseline series must contain values for x points found in all other # series baseline_series_norm_ok = True baseline_series = chart_data['series'][ args.baseline_series]['data'].copy() for idx, series in enumerate(chart_data['series']): if idx == args.baseline_series: continue if not baseline_series_norm_ok: break for xval in series['data']: if xval not in baseline_series: baseline_series_norm_ok = False break if baseline_series_norm_ok: for series in chart_data['series']: for xval in series['data']: series['data'][xval] = series['data'][ xval] / baseline_series[xval] # Return series info chart_data['xvals'] = list(chart_data['xvals']) print(json.dumps(chart_data, indent=4)) return chart_data
def dict_matcher(bench): return DictUtils.match(bench, query, policy='strict') selector = dict_matcher
def apply_extensions(base_experiment, config): """ Apply extensions in *config* to experiment *base_experiment*. The algorithm looks like this. We start with a list containing only one experiment - *base_experiment*. Then, we each extension we try to extend all experiments in a list. :param dict base_experiment: Parameters of an experiment :param dict config: Configuration dictionary :return: List of experiments extended with extensions or list with `base_experiment`. """ experiments = [copy.deepcopy(base_experiment)] for extension in config['extensions']: # in 'base_experiment' dictionary. active_experiments = [] for experiment in experiments: session_id = uuid.uuid4().__str__().replace('-', '') # Condition matches will indicate what was matched in the form "field_%d: value" # where %d is an integer number. 0 indicates entire match, other # indicates groups if present. # Now, condition may only be used when referenced parameter in # 'condition' section is a constant (does not depend on other parameters) Builder.assert_match_is_corrent(experiment, extension['condition']) matches = {} if not DictUtils.match(experiment, extension['condition'], policy='relaxed', matches=matches): # Not a match, keep unmodified version of this experiment active_experiments.append(copy.deepcopy(experiment)) else: # Create base extended version using 'parameters' section # of an extension extension_experiment = copy.deepcopy(experiment) # Add condition matched variables in case they are referenced by parameters or cases for match_key in matches: session_key = '__dlbs_%s_%s' % (session_id, match_key) extension_experiment[session_key] = matches[match_key] # We need to update values in `extension["parameters"]` for # current session id extension_experiment.update( Builder.correct_var_ref_in_extension( session_id, extension['parameters'])) if len(extension['cases']) == 0: active_experiments.append(extension_experiment) else: for case in extension['cases']: case_experiment = copy.deepcopy( extension_experiment) # We need to update values in `case` for current session id case_experiment.update( Builder.correct_var_ref_in_extension( session_id, case)) active_experiments.append(case_experiment) experiments = active_experiments experiments = [ experiment for experiment in experiments if len(experiment) > 0 ] return experiments