def parse_json_arguments(args):
    """Parse parameters, variables and extensions.

    Args:
        args (dict): Dictionary of command line arguments returned by `parse_arguments`. Is not modified.

    Returns:
        A tuple of (params, variables, extensions):
          - `params` is a dictionary of parameters (all params in args['P'])
          - `variables` is a dictionary of variables (all vars in args['V'])
          - `extensions` is a list of dictionaries (all extensions in in args['E'])
    """
    for param in ('P', 'V', 'E'):
        DictUtils.ensure_exists(args, param, [])
    params, variables, extensions = ({}, {}, [])
    DictUtils.add(params,
                  args['P'],
                  pattern='(.+?(?=[=]))=(.+)',
                  must_match=True)
    DictUtils.add(variables,
                  args['V'],
                  pattern='(.+?(?=[=]))=(.+)',
                  must_match=True)
    for extension in args['E']:
        try:
            extensions.append(json.loads(extension))
        except Exception as err:
            logging.warn("Found non-json parsable extension: %s", extension)
            raise err
    return params, variables, extensions
    def parse_log_file(filename):
        """ Parses one log file.

        Parameters are defined in that file as key-value pairs. Values must be
        json parsable strings. Every key has a prefix and a suffix equal to ``__``
        (two underscores), for instance:

        * __exp.device_batch__= 16
        * __results.training_time__= 33.343

        Parameters are keys without prefixes and suffixes i.e. 'exp.device_batch'
        and 'results.training_time' are parameter names from above example.

        :param str filename: Name of a file to parse.
        :return: Dictionary with experiment parameters.
        :rtype: dict
        """
        #       __(.+?(?=__[=]))__=(.+)
        # [ \t]*__(.+?(?=__[ \t]*[=]))__[ \t]*=(.+)
        exp_params = {}
        with open(filename) as logfile:
            # The 'must_match' must be set to false. It says that not every line
            # in a log file must match key-value pattern.
            DictUtils.add(exp_params,
                          logfile,
                          pattern='[ \t]*__(.+?(?=__[ \t]*[=]))__[ \t]*=(.+)',
                          must_match=False)
        return exp_params
示例#3
0
    def parse(inputs, recursive=False, ignore_errors=False):
        """Parse benchmark log files (*.log).

        Args:
            inputs: Path specifiers of where to search for log files.
            recursive (bool): If true, parse directories found in `inputs` recursively.
            ignore_errors (bool): If true, ignore errors associated with parsing parameter values.

        Returns:
            Instance of this class.
        """
        inputs = inputs if isinstance(inputs, list) else [inputs]
        log_files = set()
        for file_path in inputs:
            if os.path.isdir(file_path):
                log_files.update(IOUtils.gather_files(inputs, "*.log", recursive))
            elif file_path.endswith('.log'):
                log_files.add(file_path)
        log_files = list(log_files)
        benchmarks = []
        for log_file in log_files:
            parameters = {}
            with OpenFile(log_file, 'r') as logfile:
                # The 'must_match' must be set to false. It says that not
                # every line in a log file must match key-value pattern.
                DictUtils.add(
                    parameters,
                    logfile,
                    pattern='[ \t]*__(.+?(?=__[ \t]*[=]))__[ \t]*=(.+)',
                    must_match=False,
                    ignore_errors=ignore_errors
                )
            benchmarks.append(parameters)
        return BenchData(benchmarks, create_copy=False)
示例#4
0
    def load(inputs, **kwargs):
        """Load benchmark data (parsed from log files) from a JSON file.

        A file name is a JSON file that contains object with 'data' field. This field
        is a list with dictionaries, each dictionary contains parameters for one benchmark:
        {"data":[{...}, {...}, {...}]}

        Args:
            inputs (str): File name of a JSON (*.json) or a compressed JSON (.json.gz) file.

        Returns:
            Instance of this class.
        """
        is_json_file = IOUtils.is_json_file(inputs)
        if not is_json_file and isinstance(inputs, list) and len(inputs) == 1:
            is_json_file = IOUtils.is_json_file(inputs[0])
            inputs = inputs[0] if is_json_file else inputs
        if is_json_file:
            benchmarks = IOUtils.read_json(inputs, check_extension=True)
            if 'data' not in benchmarks:
                benchmarks = {'data': []}
                print("[WARNING]: No benchmark data found in '{}'".format(
                    inputs))
            return BenchData(benchmarks['data'], create_copy=False)
        #
        is_csv_file = IOUtils.is_csv_file(inputs)
        if not is_csv_file and isinstance(inputs, list) and len(inputs) == 1:
            is_csv_file = IOUtils.is_csv_file(inputs[0])
            inputs = inputs[0] if is_csv_file else inputs
        if is_csv_file:
            with OpenFile(inputs, 'r') as fobj:
                reader = csv.DictReader(fobj)
                benchmarks = list(reader)
            return BenchData(benchmarks, create_copy=False)
        #
        is_compressed_tarball = IOUtils.is_compressed_tarball(inputs)
        if not is_compressed_tarball and isinstance(inputs,
                                                    list) and len(inputs) == 1:
            is_compressed_tarball = IOUtils.is_json_file(inputs[0])
            inputs = inputs[0] if is_compressed_tarball else inputs
        if is_compressed_tarball:
            benchmarks = []
            with tarfile.open(inputs, "r:gz") as archive:
                for member in archive.getmembers():
                    if member.isfile() and member.name.endswith('.log'):
                        log_file = archive.extractfile(member)
                        if log_file is not None:
                            parameters = {}
                            DictUtils.add(
                                parameters,
                                log_file,
                                pattern=
                                '[ \t]*__(.+?(?=__[ \t]*[=]))__[ \t]*=(.+)',
                                must_match=False,
                                ignore_errors=True)
                            benchmarks.append(parameters)
            return BenchData(benchmarks, create_copy=False)
        #
        return BenchData.parse(inputs, **kwargs)
示例#5
0
    def parse_log_file(filename, ignore_errors=False):
        """ Parses one benchmark log file (possible compressed).

        A log file is a textual log file. This method can also parse compressed
        log files - files that have *.gz extension. One log file is associated with
        one benchmark.
        Parameters are defined in that file as key-value pairs. Values must be
        json parsable strings. Every key has a prefix and a suffix equal to ``__``
        (two underscores), for instance:

        * __exp.replica_batch__= 16
        * __results.training_time__= 33.343

        Parameters are keys without prefixes and suffixes i.e. 'exp.device_batch'
        and 'results.training_time' are parameter names from above example.
        Not every line must contain parsable parameters. Those that do not match
        key/value regular expression pattern are ignored.
        One parameter may present in a log file multiple times. Only the last value
        is returned.

        Args:
            filename (str): Name of a file to parse.
            ignore_errors (bool): If true, ignore parsing errors associated with parameter values.

        Returns:
            Dictionary with experiment parameters, for instance: {"exp.device_batch": 16, "exp.model": "resnet50"}
        """
        #       __(.+?(?=__[=]))__=(.+)
        # [ \t]*__(.+?(?=__[ \t]*[=]))__[ \t]*=(.+)
        parameters = {}
        with OpenFile(filename, 'r') as logfile:
            # The 'must_match' must be set to false. It says that not
            # every line in a log file must match key-value pattern.
            DictUtils.add(parameters,
                          logfile,
                          pattern='[ \t]*__(.+?(?=__[ \t]*[=]))__[ \t]*=(.+)',
                          must_match=False,
                          ignore_errors=ignore_errors)
        return parameters
示例#6
0
def main():
    if len(sys.argv) != 3:
        print("Usage: logger.py BACKEND LOG_FILE")
        exit(1)
    backend = sys.argv[1]
    log_file = sys.argv[2]

    # We may need to iterate multiple times over log records, so, reading log files into
    # a list is a preferable way.
    with open(log_file) as records:
        log_records = [record.strip() for record in records]
    # Parse parameters
    params = {}
    DictUtils.add(params,
                  log_records,
                  pattern='[ \t]*__(.+?(?=__[ \t]*[=]))__[ \t]*=(.+)',
                  must_match=False,
                  ignore_errors=True)
    updates = {}

    # Perform common checks that do not depend on particular backend
    #   1. Check if we need to update a model title
    model = params.get('exp.model', '')
    if model != '' and params.get('exp.model_title', '') == '':
        updates['exp.model_title'] = MODEL_TITLES.get(model, model)

    # Perform checks that depend on a backend type.
    if backend == 'tf_cnn_benchmarks':
        TfCnnBenchmarksBackend.check(log_records, params, updates)

    # Update a log file if needed
    if len(updates) > 0:
        with open(log_file, "a") as file_obj:
            for param in updates:
                file_obj.write("__%s__=%s\n" %
                               (param, json.dumps(updates[param])))
    def init(self,
             init_logger=False,
             load_default_config=True,
             load_config=True):
        """Initializes experimenter.

        :param bool init_logger: If True, initializes loggers
        :param bool load_default_config: If false, does not load standard configuration.
        :param bool load_config: If true, loads configuration specified on a command line
        """
        # Parse command line arguments
        parser = argparse.ArgumentParser()
        parser.add_argument(
            'action',
            type=str,
            help=
            'Action to perform. Valid actions: "print-config", "run", "build" and "analyze-plan".'
        )
        parser.add_argument('--config',
                            required=False,
                            type=str,
                            help='Configuration file (json) of an experiment.\
                                                                        Will override values from default configuration.'
                            )
        parser.add_argument('--plan',
                            required=False,
                            type=str,
                            help='Pre-built plan of an experiment (json).\
                                                                      If action is "build", a file name to write plan to.\
                                                                      If action is "run", a file name to read plan from.'
                            )
        parser.add_argument('--progress_file', '--progress-file', required=False, type=str, default=None,
                            help='A JSON file that experimenter will be updating on its progress.'\
                                 'If not present, no progress info will be available.'\
                                 'Put it somewhere in /dev/shm')
        parser.add_argument(
            '-P',
            action='append',
            required=False,
            default=[],
            help='Parameters that override parameters in configuration file.\
                                                                                     For instance, -Pexp.phase=2. Values must be json parsable (json.loads()).'
        )
        parser.add_argument(
            '-V',
            action='append',
            required=False,
            default=[],
            help=
            'Variables that override variables in configuration file in section "variables". \
                                                                                     These variables are used to generate different combinations of experiments.\
                                                                                     For instance: -Vexp.framework=\'["tensorflow", "caffe2"]\'.\
                                                                                     Values must be json parsable (json.loads()).'
        )
        parser.add_argument(
            '--log_level',
            '--log-level',
            required=False,
            default='info',
            help=
            'Python logging level. Valid values: "critical", "error", "warning", "info" and "debug"'
        )
        parser.add_argument('--discard_default_config',
                            '--discard-default-config',
                            required=False,
                            default=False,
                            action='store_true',
                            help='Do not load default configuration.')
        parser.add_argument(
            '--no_validation',
            '--no-validation',
            required=False,
            default=False,
            action='store_true',
            help='Do not perform config validation before running benchmarks.')
        parser.add_argument(
            '-E',
            action='append',
            required=False,
            default=[],
            help=
            'Extensions to add. Can be usefull to quickly customize experiments.\
                                                                                     Must be valid json parsable array element for "extension" array.'
        )
        args = parser.parse_args()

        log_level = logging.getLevelName(args.log_level.upper())
        self.action = args.action
        self.config_file = args.config
        self.plan_file = args.plan
        self.validation = not args.no_validation
        self.__progress_file = args.progress_file

        # Initialize logger
        if init_logger:
            logging.debug("Initializing logger to level %s", args.log_level)
            root = logging.getLogger()
            root.setLevel(log_level)
            handler = logging.StreamHandler(sys.stdout)
            handler.setLevel(log_level)
            root.addHandler(handler)

        logging.debug("Parsing parameters on a command line")
        DictUtils.add(self.params,
                      args.P,
                      pattern='(.+?(?=[=]))=(.+)',
                      must_match=True)
        logging.debug("Parsing variables on a command line")
        DictUtils.add(self.variables,
                      args.V,
                      pattern='(.+?(?=[=]))=(.+)',
                      must_match=True)

        # Load default configuration
        if load_default_config and not args.discard_default_config:
            logging.debug("Loading default configuration")
            _, self.config, self.param_info = ConfigurationLoader.load(
                os.path.join(os.path.dirname(__file__), 'configs'))

        # Load configurations specified on a command line
        if load_config:
            logging.debug("Loading user configuration")
            self.load_configuration()

        # Add extensions from command line
        DictUtils.ensure_exists(self.config, 'extensions', [])
        if len(args.E) > 0:
            logging.debug("Parsing extensions on a command line")
        for extension in args.E:
            try:
                ext = json.loads(extension)
                logging.debug('Found extension: %s', str(ext))
                self.config['extensions'].append(ext)
            except Exception as err:
                logging.warn("Found non-json parsable extension: %s",
                             extension)
                raise err