Exemplo n.º 1
0
def read_json_file(path, progress_bar=DUMMY_PROGRESS):
    # read lines from json file
    with open(path, "r") as inputfile:
        try:
            json_data = json.load(inputfile)
        except JSONDecodeError as error:
            inputfile.seek(0)
            is_jsonlines = any(line.strip().startswith('{') for line in inputfile) and \
                           all(line.strip().startswith('{') or line.strip() == "" for line in inputfile)
            if is_jsonlines:
                return read_jsonlines_file(path, progress_bar=DUMMY_PROGRESS)
            else:
                raise FileFormatError(str(error)) from error

    # create an experiment object to save the date loaded from the text file
    experiment = Experiment()

    if "callpaths" not in json_data:
        try:
            _read_new_json_file(experiment, json_data, progress_bar)
        except KeyError as err:
            raise FileFormatError(str(err)) from err
    else:
        try:
            _read_legacy_json_file(experiment, json_data, progress_bar)
        except KeyError as err:
            raise FileFormatError(str(err)) from err

    call_tree = create_call_tree(experiment.callpaths, progress_bar)
    experiment.call_tree = call_tree

    io_helper.validate_experiment(experiment, progress_bar)

    return experiment
Exemplo n.º 2
0
def read_talpas_file(path, progress_bar=DUMMY_PROGRESS):
    # create an experiment object to save the date loaded from the text file
    experiment = Experiment()

    complete_data = {}
    parameters = None

    progress_bar.total += os.path.getsize(path)
    # read talpas file into complete_data
    with open(path) as file:

        progress_bar.step('Reading file')
        for ln, line in enumerate(file):
            progress_bar.update(len(line))
            if line.isspace():
                continue
            line = line.replace(';', ',')

            try:
                data = json.loads(line)
            except JSONDecodeError as error:
                raise FileFormatError(
                    f'Decoding of line {ln} failed: {str(error).replace(",", ";")}. Line: "{line}"'
                )
            try:
                key = Callpath(data['callpath']), Metric(data['metric'])
                if parameters is None:
                    parameters = [
                        Parameter(p) for p in data['parameters'].keys()
                    ]
                coordinate = Coordinate(data['parameters'][p.name]
                                        for p in parameters)
                io_helper.append_to_repetition_dict(complete_data, key,
                                                    coordinate, data['value'],
                                                    progress_bar)
            except KeyError as error:
                raise FileFormatError(
                    f'Missing property in line {ln}: {str(error)}. Line: "{line}"'
                )

    # create experiment
    io_helper.repetition_dict_to_experiment(complete_data, experiment,
                                            progress_bar)

    for p in parameters:
        experiment.add_parameter(p)

    call_tree = create_call_tree(experiment.callpaths, progress_bar)
    experiment.call_tree = call_tree

    io_helper.validate_experiment(experiment, progress_bar)

    return experiment
Exemplo n.º 3
0
def read_cube_file(dir_name,
                   scaling_type,
                   pbar=DUMMY_PROGRESS,
                   selected_metrics=None):
    # read the paths of the cube files in the given directory with dir_name
    path = Path(dir_name)
    if not path.is_dir():
        raise FileFormatError(
            f'Cube file path must point to a directory: {dir_name}')
    cubex_files = list(path.glob('*/[!.]*.cubex'))
    if not cubex_files:
        raise FileFormatError(f'No cube files were found in: {dir_name}')
    pbar.total += len(cubex_files) + 6
    # iterate over all folders and read the cube profiles in them
    experiment = Experiment()

    pbar.step("Reading cube files")
    parameter_names_initial = []
    parameter_names = []
    parameter_values = []
    parameter_dict = defaultdict(set)
    progress_step_size = 5 / len(cubex_files)
    for path_id, path in enumerate(cubex_files):
        pbar.update(progress_step_size)
        folder_name = path.parent.name
        logging.debug(f"Cube file: {path} Folder: {folder_name}")

        # create the parameters
        par_start = folder_name.find(".") + 1
        par_end = folder_name.find(".r")
        par_end = None if par_end == -1 else par_end
        parameters = folder_name[par_start:par_end]
        # parameters = folder_name.split(".")

        # set scaling flag for experiment
        if path_id == 0:
            if scaling_type == "weak" or scaling_type == "strong":
                experiment.scaling = scaling_type

        param_list = re.split('([0-9.,]+)', parameters)
        param_list.remove("")

        parameter_names = [n for i, n in enumerate(param_list) if i % 2 == 0]
        parameter_value = [
            float(n.replace(',', '.').rstrip('.'))
            for i, n in enumerate(param_list) if i % 2 == 1
        ]

        # check if parameter already exists
        if path_id == 0:
            parameter_names_initial = parameter_names
        elif parameter_names != parameter_names_initial:
            raise FileFormatError(
                f"Parameters must be the same and in the same order: {parameter_names} is not {parameter_names_initial}."
            )

        for n, v in zip(parameter_names, parameter_value):
            parameter_dict[n].add(v)
        parameter_values.append(parameter_value)

    # determine non-constant parameters and add them to experiment
    parameter_selection_mask = []
    for i, p in enumerate(parameter_names):
        if len(parameter_dict[p]) > 1:
            experiment.add_parameter(Parameter(p))
            parameter_selection_mask.append(i)

    # check number of parameters, if > 1 use weak scaling instead
    # since sum values for strong scaling does not work for more than 1 parameter
    if scaling_type == 'strong' and len(experiment.parameters) > 1:
        warnings.warn(
            "Strong scaling only works for one parameter. Using weak scaling instead."
        )
        scaling_type = 'weak'
        experiment.scaling = scaling_type

    pbar.step("Reading cube files")

    show_warning_skipped_metrics = set()
    aggregated_values = defaultdict(list)

    # import data from cube files
    # optimize import memory usage by reordering files and grouping by coordinate
    num_points = 0
    reordered_files = sorted(zip(cubex_files, parameter_values),
                             key=itemgetter(1))
    for parameter_value, point_group in groupby(reordered_files,
                                                key=itemgetter(1)):
        num_points += 1
        # create coordinate
        coordinate = Coordinate(parameter_value[i]
                                for i in parameter_selection_mask)
        experiment.add_coordinate(coordinate)

        aggregated_values.clear()
        for path, _ in point_group:
            pbar.update()
            with CubexParser(str(path)) as parsed:
                callpaths = make_callpath_mapping(parsed.get_root_cnodes())
                # iterate over all metrics
                for cube_metric in parsed.get_metrics():
                    pbar.update(0)
                    # NOTE: here we could choose which metrics to extract
                    if selected_metrics and cube_metric.name not in selected_metrics:
                        continue
                    try:
                        metric_values = parsed.get_metric_values(
                            metric=cube_metric, cache=False)
                        # create the metrics
                        metric = Metric(cube_metric.name)

                        for cnode_id in metric_values.cnode_indices:
                            pbar.update(0)
                            cnode = parsed.get_cnode(cnode_id)
                            callpath = callpaths[cnode_id]
                            # NOTE: here we can use clustering algorithm to select only certain node level values
                            # create the measurements
                            cnode_values = metric_values.cnode_values(
                                cnode, convert_to_exclusive=True)

                            # in case of weak scaling calculate mean and median over all mpi process values
                            if scaling_type == "weak":
                                # do NOT use generator it is slower
                                aggregated_values[(callpath, metric)].extend(
                                    map(float, cnode_values))

                                # in case of strong scaling calculate the sum over all mpi process values
                            elif scaling_type == "strong":
                                aggregated_values[(callpath, metric)].append(
                                    float(sum(cnode_values)))

                    # Take care of missing metrics
                    except MissingMetricError as e:  # @UnusedVariable
                        show_warning_skipped_metrics.add(e.metric.name)
                        logging.info(
                            f'The cubex file {Path(*path.parts[-2:])} does not contain data for the metric "{e.metric.name}"'
                        )

        # add measurements to experiment
        for (callpath, metric), values in aggregated_values.items():
            pbar.update(0)
            experiment.add_measurement(
                Measurement(coordinate, callpath, metric, values))

    pbar.step("Unify calltrees")
    to_delete = []
    # determine common callpaths for common calltree
    # add common callpaths and metrics to experiment
    for key, value in pbar(experiment.measurements.items(),
                           len(experiment.measurements),
                           scale=0.1):
        if len(value) < num_points:
            to_delete.append(key)
        else:
            (callpath, metric) = key
            experiment.add_callpath(callpath)
            experiment.add_metric(metric)
    for key in to_delete:
        pbar.update(0)
        del experiment.measurements[key]

    # determine calltree
    call_tree = io_helper.create_call_tree(experiment.callpaths,
                                           pbar,
                                           progress_scale=0.1)
    experiment.call_tree = call_tree

    if show_warning_skipped_metrics:
        warnings.warn(
            "The following metrics were skipped because they contained no data: "
            f"{', '.join(show_warning_skipped_metrics)}. For more details see log."
        )

    io_helper.validate_experiment(experiment, pbar)
    pbar.update()
    return experiment
Exemplo n.º 4
0
def read_extrap3_experiment(path, progress_bar=DUMMY_PROGRESS):
    progress_bar.total += os.path.getsize(path)
    with open(path, "rb") as file:
        ioHelper = IoHelper(file)

        try:
            qualifier = ioHelper.readString()

            if qualifier != "EXTRAP_EXPERIMENT":
                raise FileFormatError(
                    "This is not an Extra-P 3 Experiment File. Qualifier was "
                    + str(qualifier))
        except struct.error as err:
            raise FileFormatError(
                "This is not an Extra-P 3 Experiment File.") from err

        try:
            exp = Experiment()
            id_mappings = _Mappings()
            versionNumber = ioHelper.readString()
            prefix = ioHelper.readString()
            progress_bar.step('Load Extra-P 3 experiment')
            last_pos = 0

            is_sparse = __Ref(False)
            while prefix:
                pos = file.tell()
                progress_bar.update(pos - last_pos)
                last_pos = pos
                # logging.debug("Deserialize " + str(prefix))
                # noinspection PyNoneFunctionAssignment
                if prefix == 'Parameter':
                    p = deserialize_parameter(id_mappings, ioHelper)
                    exp.add_parameter(p)

                elif prefix == 'Metric':
                    m = deserialize_metric(ioHelper)
                    SAFE_RETURN_None(m)
                    exp.add_metric(m)

                elif prefix == 'Region':
                    deserialize_region(id_mappings, ioHelper)

                elif prefix == 'Callpath':
                    c = deserialize_callpath(id_mappings, ioHelper)
                    SAFE_RETURN_None(c)
                    exp.add_callpath(c)
                    progress_bar.total += 100

                elif prefix == 'Coordinate':
                    c = deserialize_coordinate(exp, id_mappings, ioHelper)
                    SAFE_RETURN_None(c)
                    exp.add_coordinate(c)

                elif prefix == 'ModelComment':
                    deserialize_modelcomment(ioHelper)
                    # SAFE_RETURN_None(comment)
                    # exp.addModelComment(comment)

                elif prefix == 'SingleParameterSimpleModelGenerator':
                    generator = deserialize_SingleParameterSimpleModelGenerator(
                        exp, is_sparse, ioHelper)
                    SAFE_RETURN_None(generator)
                    exp.add_modeler(generator)

                elif prefix == 'SingleParameterRefiningModelGenerator':
                    generator = deserialize_SingleParameterModelGenerator(
                        exp, is_sparse, ioHelper)
                    SAFE_RETURN_None(generator)
                    exp.add_modeler(generator)

                elif prefix == 'MultiParameterSimpleModelGenerator':
                    generator = deserialize_MultiParameterModelGenerator(
                        exp, is_sparse, ioHelper)
                    SAFE_RETURN_None(generator)
                    exp.add_modeler(generator)

                elif prefix == 'MultiParameterSparseModelGenerator':
                    generator = deserialize_MultiParameterModelGenerator(
                        exp, is_sparse, ioHelper)
                    SAFE_RETURN_None(generator)
                    exp.add_modeler(generator)

                elif prefix == 'ExperimentPoint':
                    point = deserialize_ExperimentPoint(
                        exp, id_mappings, ioHelper)
                    SAFE_RETURN_None(point)
                    exp.add_measurement(point)

                elif prefix == 'Model':
                    model, generator_id = deserialize_Model(
                        exp, id_mappings, is_sparse, ioHelper)
                    SAFE_RETURN_None(model)
                    exp.modelers[generator_id].models[(model.callpath,
                                                       model.metric)] = model

                else:
                    raise FileFormatError("Unknown object: " + prefix +
                                          ". Can not load experiment.")

                prefix = ioHelper.readString()
        except struct.error as err:
            raise FileFormatError(str(err)) from err

        pos = file.tell()
        progress_bar.update(pos - last_pos)

        # remove empty modelers
        exp.modelers = [m for m in exp.modelers if len(m.models) > 0]
        # add measurements to model
        for modeler in exp.modelers:
            for key, model in modeler.models.items():
                model.measurements = exp.measurements.get(key)

        callpaths = exp.callpaths
        call_tree = io_helper.create_call_tree(callpaths,
                                               progress_bar,
                                               True,
                                               progress_scale=100)
        exp.call_tree = call_tree

        io_helper.validate_experiment(exp, progress_bar)
        # new code
        return exp
Exemplo n.º 5
0
def read_text_file(path, progress_bar=DUMMY_PROGRESS):
    # read text file into list
    with open(path) as file:
        lines = file.readlines()

    # remove empty lines
    lines_no_space = [l for l in lines if not l.isspace()]

    # remove line breaks
    lines_no_space = [l.replace("\n", "") for l in lines_no_space]

    # create an experiment object to save the date loaded from the text file
    experiment = Experiment()

    # variables for parsing
    number_parameters = 0
    last_metric = None
    last_callpath = Callpath("")
    coordinate_id = 0

    if len(lines_no_space) == 0:
        raise FileFormatError(f'File contains no data: "{path}"')

    # parse text to extrap objects
    for i, line in enumerate(progress_bar(lines)):
        if line.isspace() or line.startswith('#'):
            continue  # allow comments
        line = re_whitespace.sub(' ', line)
        # get field name
        field_separator_idx = line.find(" ")
        field_name = line[:field_separator_idx]
        field_value = line[field_separator_idx + 1:].strip()

        if field_name == "METRIC":
            # create a new metric if not already exists
            metric_name = field_value
            test_metric = Metric(metric_name)
            if test_metric not in experiment.metrics:
                metric = test_metric
                experiment.add_metric(metric)
                last_metric = metric
            else:
                last_metric = metric
            # reset the coordinate id, since moving to a new region
            coordinate_id = 0

        elif field_name == "REGION":
            # create a new region if not already exists
            callpath_name = field_value

            callpath = Callpath(callpath_name)
            experiment.add_callpath(callpath)
            last_callpath = callpath

            # reset the coordinate id, since moving to a new region
            coordinate_id = 0

        elif field_name == "DATA":
            if last_metric is None:
                last_metric = Metric("")
            # create a new data set
            data_string = field_value
            data_list = data_string.split(" ")
            values = [float(d) for d in data_list]
            if 1 <= number_parameters <= 4:
                # create one measurement per repetition

                if coordinate_id >= len(experiment.coordinates):
                    raise FileFormatError(
                        f'To many DATA lines ({coordinate_id}) for the number of POINTS '
                        f'({len(experiment.coordinates)}) in line {i}.')
                measurement = Measurement(
                    experiment.coordinates[coordinate_id], last_callpath,
                    last_metric, values)
                experiment.add_measurement(measurement)
                coordinate_id += 1
            elif number_parameters >= 5:
                raise FileFormatError(
                    "This input format supports a maximum of 4 parameters.")
            else:
                raise FileFormatError("This file has no parameters.")

        elif field_name == "PARAMETER":
            # create a new parameter
            parameters = field_value.split(' ')
            experiment.parameters += [Parameter(p) for p in parameters]
            number_parameters = len(experiment.parameters)

        elif field_name == "POINTS":
            coordinate_string = field_value.strip()
            if '(' in coordinate_string:
                coordinate_string = coordinate_string.replace(") (", ")(")
                coordinate_string = coordinate_string[1:-1]
                coordinate_strings = coordinate_string.split(')(')
            else:
                coordinate_strings = coordinate_string.split(' ')
            # create a new point
            if number_parameters == 1:
                coordinates = [
                    Coordinate(float(c)) for c in coordinate_strings
                ]
                experiment.coordinates.extend(coordinates)
            elif 1 < number_parameters < 5:
                for coordinate_string in coordinate_strings:
                    coordinate_string = coordinate_string.strip()
                    values = coordinate_string.split(" ")
                    coordinate = Coordinate(float(v) for v in values)
                    experiment.coordinates.append(coordinate)
            elif number_parameters >= 5:
                raise FileFormatError(
                    "This input format supports a maximum of 4 parameters.")
            else:
                raise FileFormatError("This file has no parameters.")
        else:
            raise FileFormatError(
                f'Encountered wrong field: "{field_name}" in line {i}: {line}')

    if last_metric == Metric(''):
        experiment.metrics.append(last_metric)
    if last_metric == Callpath(''):
        experiment.callpaths.append(last_callpath)
    # create the call tree and add it to the experiment
    call_tree = create_call_tree(experiment.callpaths,
                                 progress_bar,
                                 progress_scale=10)
    experiment.call_tree = call_tree

    io_helper.validate_experiment(experiment, progress_bar)

    return experiment
Exemplo n.º 6
0
def read_jsonlines_file(path, progress_bar=DUMMY_PROGRESS):
    # create an experiment object to save the date loaded from the text file
    experiment = Experiment()

    complete_data = {}
    parameters = None
    default_callpath = Callpath('<root>')
    default_metric = Metric('<default>')

    progress_bar.total += os.path.getsize(path)

    # read jsonlines file into complete_data
    with open(path) as file:
        progress_bar.step('Reading file')
        for ln, line in enumerate(file):
            progress_bar.update(len(line))
            if line.isspace():
                continue

            try:
                data = json.loads(line)
            except JSONDecodeError as error:
                raise FileFormatError(
                    f'Decoding of line {ln} failed: {str(error)}. Line: "{line}"'
                )
            try:
                if 'callpath' in data:
                    callpath = Callpath(data['callpath'])
                else:
                    callpath = default_callpath

                if 'metric' in data:
                    metric = Metric(data['metric'])
                else:
                    metric = default_metric
                key = callpath, metric
                if parameters is None:  # ensures uniform order of paremeters
                    parameters = [Parameter(p) for p in data['params'].keys()]
                coordinate = Coordinate(data['params'][p.name]
                                        for p in parameters)
                io_helper.append_to_repetition_dict(complete_data, key,
                                                    coordinate, data['value'],
                                                    progress_bar)
            except KeyError as error:
                raise FileFormatError(
                    f'Missing property in line {ln}: {str(error)}. Line: "{line}"'
                )

    # create experiment
    io_helper.repetition_dict_to_experiment(complete_data, experiment,
                                            progress_bar)

    for p in parameters:
        experiment.add_parameter(p)

    callpaths = experiment.callpaths
    experiment.call_tree = create_call_tree(callpaths, progress_bar)

    io_helper.validate_experiment(experiment, progress_bar)

    return experiment