Exemplo n.º 1
0
 def build_cache(self, summary_file, target_variable, query):
     """Loads data from json file."""
     with OpenFile(summary_file) as file_obj:
         summary = json.load(file_obj)
     self.cache = {}
     self.nets = Set()
     self.batches = Set()
     self.devices = Set()
     for experiment in summary['data']:
         if target_variable not in experiment:
             print("target variable not in experiment, skipping")
             continue
         if not DictUtils.match(experiment, query, policy='strict'):
             continue
         # batch is an effective batch here
         key = '{0}_{1}_{2}'.format(
             experiment['exp.model_title'],
             experiment['exp.gpus'],
             experiment['exp.effective_batch']
         )
         self.cache[key] = float(experiment[target_variable])
         self.nets.add(experiment['exp.model_title'])
         self.batches.add(int(experiment['exp.effective_batch']))
         self.devices.add(str(experiment['exp.gpus']))
     self.nets = sorted(list(self.nets))
     self.batches = sorted(list(self.batches))
     self.devices = sorted(list(self.devices), key=len)
Exemplo n.º 2
0
    def parse(inputs, recursive=False, ignore_errors=False):
        """Parse benchmark log files (*.log).

        Args:
            inputs: Path specifiers of where to search for log files.
            recursive (bool): If true, parse directories found in `inputs` recursively.
            ignore_errors (bool): If true, ignore errors associated with parsing parameter values.

        Returns:
            Instance of this class.
        """
        inputs = inputs if isinstance(inputs, list) else [inputs]
        log_files = set()
        for file_path in inputs:
            if os.path.isdir(file_path):
                log_files.update(IOUtils.gather_files(inputs, "*.log", recursive))
            elif file_path.endswith('.log'):
                log_files.add(file_path)
        log_files = list(log_files)
        benchmarks = []
        for log_file in log_files:
            parameters = {}
            with OpenFile(log_file, 'r') as logfile:
                # The 'must_match' must be set to false. It says that not
                # every line in a log file must match key-value pattern.
                DictUtils.add(
                    parameters,
                    logfile,
                    pattern='[ \t]*__(.+?(?=__[ \t]*[=]))__[ \t]*=(.+)',
                    must_match=False,
                    ignore_errors=ignore_errors
                )
            benchmarks.append(parameters)
        return BenchData(benchmarks, create_copy=False)
Exemplo n.º 3
0
    def load(inputs, **kwargs):
        """Load benchmark data (parsed from log files) from a JSON file.

        A file name is a JSON file that contains object with 'data' field. This field
        is a list with dictionaries, each dictionary contains parameters for one benchmark:
        {"data":[{...}, {...}, {...}]}

        Args:
            inputs (str): File name of a JSON (*.json) or a compressed JSON (.json.gz) file.

        Returns:
            Instance of this class.
        """
        is_json_file = IOUtils.is_json_file(inputs)
        if not is_json_file and isinstance(inputs, list) and len(inputs) == 1:
            is_json_file = IOUtils.is_json_file(inputs[0])
            inputs = inputs[0] if is_json_file else inputs
        if is_json_file:
            benchmarks = IOUtils.read_json(inputs, check_extension=True)
            if 'data' not in benchmarks:
                benchmarks = {'data': []}
                print("[WARNING]: No benchmark data found in '{}'".format(
                    inputs))
            return BenchData(benchmarks['data'], create_copy=False)
        #
        is_csv_file = IOUtils.is_csv_file(inputs)
        if not is_csv_file and isinstance(inputs, list) and len(inputs) == 1:
            is_csv_file = IOUtils.is_csv_file(inputs[0])
            inputs = inputs[0] if is_csv_file else inputs
        if is_csv_file:
            with OpenFile(inputs, 'r') as fobj:
                reader = csv.DictReader(fobj)
                benchmarks = list(reader)
            return BenchData(benchmarks, create_copy=False)
        #
        is_compressed_tarball = IOUtils.is_compressed_tarball(inputs)
        if not is_compressed_tarball and isinstance(inputs,
                                                    list) and len(inputs) == 1:
            is_compressed_tarball = IOUtils.is_json_file(inputs[0])
            inputs = inputs[0] if is_compressed_tarball else inputs
        if is_compressed_tarball:
            benchmarks = []
            with tarfile.open(inputs, "r:gz") as archive:
                for member in archive.getmembers():
                    if member.isfile() and member.name.endswith('.log'):
                        log_file = archive.extractfile(member)
                        if log_file is not None:
                            parameters = {}
                            DictUtils.add(
                                parameters,
                                log_file,
                                pattern=
                                '[ \t]*__(.+?(?=__[ \t]*[=]))__[ \t]*=(.+)',
                                must_match=False,
                                ignore_errors=True)
                            benchmarks.append(parameters)
            return BenchData(benchmarks, create_copy=False)
        #
        return BenchData.parse(inputs, **kwargs)
Exemplo n.º 4
0
    def parse_log_file(filename, ignore_errors=False):
        """ Parses one benchmark log file (possible compressed).

        A log file is a textual log file. This method can also parse compressed
        log files - files that have *.gz extension. One log file is associated with
        one benchmark.
        Parameters are defined in that file as key-value pairs. Values must be
        json parsable strings. Every key has a prefix and a suffix equal to ``__``
        (two underscores), for instance:

        * __exp.replica_batch__= 16
        * __results.training_time__= 33.343

        Parameters are keys without prefixes and suffixes i.e. 'exp.device_batch'
        and 'results.training_time' are parameter names from above example.
        Not every line must contain parsable parameters. Those that do not match
        key/value regular expression pattern are ignored.
        One parameter may present in a log file multiple times. Only the last value
        is returned.

        Args:
            filename (str): Name of a file to parse.
            ignore_errors (bool): If true, ignore parsing errors associated with parameter values.

        Returns:
            Dictionary with experiment parameters, for instance: {"exp.device_batch": 16, "exp.model": "resnet50"}
        """
        #       __(.+?(?=__[=]))__=(.+)
        # [ \t]*__(.+?(?=__[ \t]*[=]))__[ \t]*=(.+)
        parameters = {}
        with OpenFile(filename, 'r') as logfile:
            # The 'must_match' must be set to false. It says that not
            # every line in a log file must match key-value pattern.
            DictUtils.add(parameters,
                          logfile,
                          pattern='[ \t]*__(.+?(?=__[ \t]*[=]))__[ \t]*=(.+)',
                          must_match=False,
                          ignore_errors=ignore_errors)
        return parameters