Exemplo n.º 1
0
 def test_read_12(self):
     experiment = read_json_file("data/json/input_12.JSON")
     self.assertListEqual(experiment.parameters,
                          [Parameter('x'), Parameter('y')])
     self.assertListEqual(experiment.coordinates, [
         Coordinate(4.0, 10.0),
         Coordinate(4.0, 20.0),
         Coordinate(4.0, 30.0),
         Coordinate(4.0, 40.0),
         Coordinate(4.0, 50.0),
         Coordinate(8.0, 10.0),
         Coordinate(8.0, 20.0),
         Coordinate(8.0, 30.0),
         Coordinate(8.0, 40.0),
         Coordinate(8.0, 50.0),
         Coordinate(16.0, 10.0),
         Coordinate(16.0, 20.0),
         Coordinate(16.0, 30.0),
         Coordinate(16.0, 40.0),
         Coordinate(16.0, 50.0),
         Coordinate(32.0, 10.0),
         Coordinate(32.0, 20.0),
         Coordinate(32.0, 30.0),
         Coordinate(32.0, 40.0),
         Coordinate(32.0, 50.0),
         Coordinate(64.0, 10.0),
         Coordinate(64.0, 20.0),
         Coordinate(64.0, 30.0),
         Coordinate(64.0, 40.0),
         Coordinate(64.0, 50.0)
     ])
Exemplo n.º 2
0
 def test_read_5(self):
     Parameter.ID_COUNTER = itertools.count()
     experiment = read_json_file("data/json/input_5.JSON")
     self.assertListEqual(experiment.parameters,
                          [Parameter('x'), Parameter('y')])
     self.assertListEqual([p.id for p in experiment.parameters], [0, 1])
     self.assertListEqual(experiment.coordinates, [
         Coordinate(4.0, 10.0),
         Coordinate(4.0, 20.0),
         Coordinate(4.0, 30.0),
         Coordinate(4.0, 40.0),
         Coordinate(4.0, 50.0),
         Coordinate(8.0, 10.0),
         Coordinate(8.0, 20.0),
         Coordinate(8.0, 30.0),
         Coordinate(8.0, 40.0),
         Coordinate(8.0, 50.0),
         Coordinate(16.0, 10.0),
         Coordinate(16.0, 20.0),
         Coordinate(16.0, 30.0),
         Coordinate(16.0, 40.0),
         Coordinate(16.0, 50.0),
         Coordinate(32.0, 10.0),
         Coordinate(32.0, 20.0),
         Coordinate(32.0, 30.0),
         Coordinate(32.0, 40.0),
         Coordinate(32.0, 50.0),
         Coordinate(64.0, 10.0),
         Coordinate(64.0, 20.0),
         Coordinate(64.0, 30.0),
         Coordinate(64.0, 40.0),
         Coordinate(64.0, 50.0)
     ])
 def test_extrap3_multiparameter_experiment(self):
     experiment = read_extrap3_experiment('data/input/experiment_3_mp')
     self.assertListEqual([Parameter('x'), Parameter('y'), Parameter('z')], experiment.parameters)
     self.assertSetEqual({Coordinate(1, 1, 1), Coordinate(1, 1, 10), Coordinate(1, 1, 25),
                          Coordinate(1, 10, 1), Coordinate(1, 10, 10), Coordinate(1, 10, 25),
                          Coordinate(1, 25, 1), Coordinate(1, 25, 10), Coordinate(1, 25, 25),
                          Coordinate(10, 1, 1), Coordinate(10, 1, 10), Coordinate(10, 1, 25),
                          Coordinate(10, 10, 1), Coordinate(10, 10, 10), Coordinate(10, 10, 25),
                          Coordinate(10, 25, 1), Coordinate(10, 25, 10), Coordinate(10, 25, 25),
                          Coordinate(25, 1, 1), Coordinate(25, 1, 10), Coordinate(25, 1, 25),
                          Coordinate(25, 10, 1), Coordinate(25, 10, 10), Coordinate(25, 10, 25),
                          Coordinate(25, 25, 1), Coordinate(25, 25, 10), Coordinate(25, 25, 25)
                          }, set(experiment.coordinates))
     self.assertSetEqual({Callpath('main'), Callpath('main->init_mat'), Callpath('main->zero_mat'),
                          Callpath('main->mat_mul')}, set(experiment.callpaths))
     self.assertSetEqual({Callpath('main'), Callpath('main->init_mat'), Callpath('main->zero_mat'),
                          Callpath('main->mat_mul')}, set(experiment.callpaths))
     call_tree = CallTree()
     main = Node('main', Callpath('main'))
     call_tree.add_child_node(main)
     init_mat = Node('init_mat', Callpath('main->init_mat'))
     main.add_child_node(init_mat)
     zero_mat = Node('zero_mat', Callpath('main->zero_mat'))
     main.add_child_node(zero_mat)
     mat_mul = Node('mat_mul', Callpath('main->mat_mul'))
     main.add_child_node(mat_mul)
     self.assertEqual(call_tree, experiment.call_tree)
Exemplo n.º 4
0
 def test_read_1_json(self):
     Parameter.ID_COUNTER = itertools.count()
     experiment = read_json_file("data/jsonlines/test1.jsonl")
     self.assertListEqual([Parameter('x'), Parameter('y')],
                          experiment.parameters)
     self.assertListEqual([0, 1], [p.id for p in experiment.parameters])
     self.assertListEqual([
         Coordinate(x, y) for x in range(1, 5 + 1) for y in range(1, 5 + 1)
     ], experiment.coordinates)
     self.assertListEqual([Metric('metr')], experiment.metrics)
     self.assertListEqual([Callpath('<root>')], experiment.callpaths)
Exemplo n.º 5
0
 def test_read_2(self):
     Parameter.ID_COUNTER = itertools.count()
     experiment = read_jsonlines_file("data/jsonlines/test2.jsonl")
     self.assertListEqual([Parameter('p'), Parameter('n')],
                          experiment.parameters)
     self.assertListEqual([0, 1], [p.id for p in experiment.parameters])
     self.assertListEqual([
         Coordinate(x, y) for x in [16, 32, 64, 128, 256]
         for y in [100, 200, 300, 400, 500]
     ], experiment.coordinates)
     self.assertListEqual([Metric('metr')], experiment.metrics)
     self.assertListEqual([Callpath('<root>')], experiment.callpaths)
Exemplo n.º 6
0
 def test_matrix3p(self):
     Parameter.ID_COUNTER = itertools.count()
     experiment = read_jsonlines_file("data/jsonlines/matrix_3p.jsonl")
     self.assertListEqual(
         [Parameter('x'), Parameter('y'),
          Parameter('z')], experiment.parameters)
     self.assertListEqual([0, 1, 2], [p.id for p in experiment.parameters])
     self.assertListEqual([Coordinate(x, 1, 1) for x in range(1, 5 + 1)] +
                          [Coordinate(1, x, 1) for x in range(2, 5 + 1)] +
                          [Coordinate(1, 1, x) for x in range(2, 5 + 1)],
                          experiment.coordinates)
     self.assertListEqual([Metric('metr')], experiment.metrics)
     self.assertListEqual([Callpath('<root>')], experiment.callpaths)
Exemplo n.º 7
0
 def test_single_parameter(self):
     experiment = read_cube_file('data/cubeset/single_parameter', 'weak')
     self.assertListEqual([Parameter('x')], experiment.parameters)
     self.assertSetEqual(
         {
             Coordinate(1),
             Coordinate(10),
             Coordinate(25),
             Coordinate(50),
             Coordinate(100),
             Coordinate(250),
             Coordinate(500),
             Coordinate(1000),
             Coordinate(2000)
         }, set(experiment.coordinates))
     self.assertSetEqual(
         {
             Callpath('main'),
             Callpath('main->init_mat'),
             Callpath('main->zero_mat'),
             Callpath('main->mat_mul')
         }, set(experiment.callpaths))
     self.assertSetEqual(
         {
             Metric('visits'),
             Metric('time'),
             Metric('min_time'),
             Metric('max_time'),
             Metric('PAPI_FP_OPS'),
             Metric('PAPI_L3_TCM'),
             Metric('PAPI_L2_TCM')
         }, set(experiment.metrics))
     read_cube_file('data/cubeset/single_parameter', 'strong')
Exemplo n.º 8
0
 def test_read_2(self):
     Parameter.ID_COUNTER = itertools.count()
     experiment = read_json_file("data/json/new/input2.json")
     x = Parameter('x')
     y = Parameter('y')
     self.assertListEqual(experiment.parameters, [x, y])
     self.assertListEqual(experiment.coordinates, [
         Coordinate(4, 10),
         Coordinate(8, 20),
         Coordinate(16, 30),
         Coordinate(32, 40),
         Coordinate(64, 50)
     ])
     self.assertListEqual(experiment.metrics,
                          [Metric('time'), Metric('visits')])
     self.assertListEqual(
         experiment.callpaths,
         [Callpath('sweep'), Callpath('sweep2')])
Exemplo n.º 9
0
def read_talpas_file(path, progress_bar=DUMMY_PROGRESS):
    # create an experiment object to save the date loaded from the text file
    experiment = Experiment()

    complete_data = {}
    parameters = None

    progress_bar.total += os.path.getsize(path)
    # read talpas file into complete_data
    with open(path) as file:

        progress_bar.step('Reading file')
        for ln, line in enumerate(file):
            progress_bar.update(len(line))
            if line.isspace():
                continue
            line = line.replace(';', ',')

            try:
                data = json.loads(line)
            except JSONDecodeError as error:
                raise FileFormatError(
                    f'Decoding of line {ln} failed: {str(error).replace(",", ";")}. Line: "{line}"'
                )
            try:
                key = Callpath(data['callpath']), Metric(data['metric'])
                if parameters is None:
                    parameters = [
                        Parameter(p) for p in data['parameters'].keys()
                    ]
                coordinate = Coordinate(data['parameters'][p.name]
                                        for p in parameters)
                io_helper.append_to_repetition_dict(complete_data, key,
                                                    coordinate, data['value'],
                                                    progress_bar)
            except KeyError as error:
                raise FileFormatError(
                    f'Missing property in line {ln}: {str(error)}. Line: "{line}"'
                )

    # create experiment
    io_helper.repetition_dict_to_experiment(complete_data, experiment,
                                            progress_bar)

    for p in parameters:
        experiment.add_parameter(p)

    call_tree = create_call_tree(experiment.callpaths, progress_bar)
    experiment.call_tree = call_tree

    io_helper.validate_experiment(experiment, progress_bar)

    return experiment
Exemplo n.º 10
0
    def create_default_building_blocks(allow_log_terms,
                                       allow_negative_exponents=False):
        """
        Creates the default building blocks for the single parameter hypothesis
        that will be used during the search for the best hypothesis.
        """

        if allow_log_terms:
            exponents = [(0, 1, 1), (0, 1, 2), (1, 4, 0), (1, 3, 0), (1, 4, 1),
                         (1, 3, 1), (1, 4, 2), (1, 3, 2), (1, 2, 0), (1, 2, 1),
                         (1, 2, 2), (2, 3, 0), (3, 4, 0), (2, 3, 1), (3, 4, 1),
                         (4, 5, 0), (2, 3, 2), (3, 4, 2), (1, 1, 0), (1, 1, 1),
                         (1, 1, 2), (5, 4, 0), (5, 4, 1), (4, 3, 0), (4, 3, 1),
                         (3, 2, 0), (3, 2, 1), (3, 2, 2), (5, 3, 0), (7, 4, 0),
                         (2, 1, 0), (2, 1, 1), (2, 1, 2), (9, 4, 0), (7, 3, 0),
                         (5, 2, 0), (5, 2, 1), (5, 2, 2), (8, 3, 0),
                         (11, 4, 0), (3, 1, 0), (3, 1, 1)]
            # These were used for relearn
            if allow_negative_exponents:
                exponents += [
                    (-0, 1, -1), (-0, 1, -2), (-1, 4, -1), (-1, 3, -1),
                    (-1, 4, -2), (-1, 3, -2), (-1, 2, -1), (-1, 2, -2),
                    (-2, 3, -1), (-3, 4, -1), (-2, 3, -2), (-3, 4, -2),
                    (-1, 1, -1), (-1, 1, -2), (-5, 4, -1), (-4, 3, -1),
                    (-3, 2, -1), (-3, 2, -2), (-2, 1, -1), (-2, 1, -2),
                    (-5, 2, -1), (-5, 2, -2), (-3, 1, -1)
                ]

        else:
            exponents = [(1, 4, 0), (1, 3, 0), (1, 2, 0), (2, 3, 0), (3, 4, 0),
                         (4, 5, 0), (1, 1, 0), (5, 4, 0), (4, 3, 0), (3, 2, 0),
                         (5, 3, 0), (7, 4, 0), (2, 1, 0), (9, 4, 0), (7, 3, 0),
                         (5, 2, 0), (8, 3, 0), (11, 4, 0), (3, 1, 0)]
            # These were used for relearn
            if allow_negative_exponents:
                exponents += [(-1, 4, 0), (-1, 3, 0), (-1, 2, 0), (-2, 3, 0),
                              (-3, 4, 0), (-4, 5, 0), (-1, 1, 0), (-5, 4, 0),
                              (-4, 3, 0), (-3, 2, 0), (-5, 3, 0), (-7, 4, 0),
                              (-2, 1, 0), (-9, 4, 0), (-7, 3, 0), (-5, 2, 0),
                              (-8, 3, 0), (-11, 4, 0), (-3, 1, 0)]

        hypotheses_building_blocks = [
            CompoundTerm.create(*e) for e in exponents
        ]
        # print the hypothesis building blocks, compound terms in debug mode
        if logging.getLogger().isEnabledFor(logging.DEBUG):
            parameter = Parameter('p')
            for i, compound_term in enumerate(hypotheses_building_blocks):
                logging.debug(
                    f"Compound term {i}: {compound_term.to_string(parameter)}")

        return hypotheses_building_blocks
Exemplo n.º 11
0
def deserialize_coordinate(exp, id_mapping, ioHelper):
    id = ioHelper.readId()
    length = ioHelper.readInt()

    coordinate_parts = [None] * length
    for i in range(length):
        param = Parameter(ioHelper.readString())
        paramIdx = exp.parameters.index(param)
        val = ioHelper.readValue()
        coordinate_parts[paramIdx] = val

    coordinate = Coordinate(*coordinate_parts)
    id_mapping.coordinate_mapping[id] = coordinate
    return coordinate
Exemplo n.º 12
0
 def test_read_1(self):
     Parameter.ID_COUNTER = itertools.count()
     experiment = read_json_file("data/json/input_1.JSON")
     self.assertListEqual(experiment.parameters, [Parameter('x')])
     self.assertListEqual([p.id for p in experiment.parameters], [0])
     self.assertListEqual(experiment.coordinates, [
         Coordinate(4),
         Coordinate(8),
         Coordinate(16),
         Coordinate(32),
         Coordinate(64)
     ])
     self.assertListEqual(experiment.metrics, [Metric('time')])
     self.assertListEqual(experiment.callpaths, [Callpath('sweep')])
Exemplo n.º 13
0
 def test_sparse_matrix2p(self):
     Parameter.ID_COUNTER = itertools.count()
     experiment = read_jsonlines_file(
         "data/jsonlines/sparse_matrix_2p.jsonl")
     self.assertListEqual([Parameter('x'), Parameter('y')],
                          experiment.parameters)
     self.assertListEqual([0, 1], [p.id for p in experiment.parameters])
     self.assertListEqual([
         Coordinate(20, 1),
         Coordinate(30, 1),
         Coordinate(30, 2),
         Coordinate(40, 1),
         Coordinate(40, 2),
         Coordinate(40, 3),
         Coordinate(50, 1),
         Coordinate(50, 2),
         Coordinate(50, 3),
         Coordinate(50, 4),
         Coordinate(60, 1),
         Coordinate(60, 2),
         Coordinate(60, 3),
         Coordinate(60, 4),
         Coordinate(60, 5),
         Coordinate(70, 2),
         Coordinate(70, 3),
         Coordinate(70, 4),
         Coordinate(70, 5),
         Coordinate(80, 3),
         Coordinate(80, 4),
         Coordinate(80, 5),
         Coordinate(90, 4),
         Coordinate(90, 5),
         Coordinate(100, 5)
     ], experiment.coordinates)
     self.assertListEqual([Metric('metr')], experiment.metrics)
     self.assertListEqual([Callpath('<root>')], experiment.callpaths)
Exemplo n.º 14
0
def _read_new_json_file(experiment, json_data, progress_bar):
    parameter_data = json_data["parameters"]
    for p in parameter_data:
        parameter = Parameter(p)
        experiment.add_parameter(parameter)

    measurements_data = json_data["measurements"]
    for callpath_name, data in progress_bar(measurements_data.items()):
        for metric_name, measurements in data.items():
            for measurement in measurements:
                coordinate = Coordinate(measurement['point'])
                experiment.add_coordinate(coordinate)
                callpath = Callpath(callpath_name)
                experiment.add_callpath(callpath)
                metric = Metric(metric_name)
                experiment.add_metric(metric)
                measurement = Measurement(coordinate, callpath, metric,
                                          measurement['values'])
                experiment.add_measurement(measurement)
Exemplo n.º 15
0
    def find_best_hypothesis(self, candidate_hypotheses: Iterable[SH], constant_cost: float,
                             measurements: Sequence[Measurement], current_best: H = MAX_HYPOTHESIS) -> Union[SH, H]:
        """
        Searches for the best single parameter hypothesis and returns it.
        """

        # currently the constant hypothesis is the best hypothesis
        best_hypothesis = current_best

        # search for the best hypothesis over all functions that can be build with the basic building blocks

        for i, next_hypothesis in enumerate(candidate_hypotheses):

            if self.use_crossvalidation:
                # use leave one out crossvalidation
                # cycle through points and leave one out per iteration
                for element_id in range(len(measurements)):
                    # copy measurements to create the training sets
                    training_measurements = list(measurements)

                    # remove one element the set
                    training_measurements.pop(element_id)

                    # validation set
                    validation_measurement = measurements[element_id]

                    # compute the model coefficients based on the training data
                    next_hypothesis.compute_coefficients(training_measurements)

                    # check if the constant coefficient should actually be 0
                    next_hypothesis.clean_constant_coefficient(self.epsilon, training_measurements)

                    # compute the cost of the single-parameter model for the validation data
                    next_hypothesis.compute_cost(training_measurements, validation_measurement)

                # compute the model coefficients using all data
                next_hypothesis.compute_coefficients(measurements)
                logging.debug(f"single-parameter model {i}: " + next_hypothesis.function.to_string(Parameter('p')))
            else:
                # compute the model coefficients based on the training data
                next_hypothesis.compute_coefficients(measurements)

                # check if the constant coefficient should actually be 0
                next_hypothesis.clean_constant_coefficient(
                    self.CLEAN_CONSTANT_EPSILON, measurements)

                # compute the cost of the single-parameter model for the validation data
                next_hypothesis.compute_cost_all_points(measurements)

            # compute the AR2 for the hypothesis
            next_hypothesis.compute_adjusted_rsquared(constant_cost, measurements)

            # check if hypothesis is valid
            if not next_hypothesis.is_valid():
                logging.info(
                    "Numeric imprecision found. Model is invalid and will be ignored.")

            # compare the new hypothesis with the best hypothesis
            elif self.compare_hypotheses(best_hypothesis, next_hypothesis, measurements):
                best_hypothesis = next_hypothesis

        return best_hypothesis
Exemplo n.º 16
0
 def test_multi_parameter(self):
     experiment = read_cube_file('data/cubeset/multi_parameter', 'weak')
     self.assertListEqual(
         [Parameter('x'), Parameter('y'),
          Parameter('z')], experiment.parameters)
     self.assertSetEqual(
         {
             Coordinate(1, 1, 1),
             Coordinate(1, 1, 10),
             Coordinate(1, 1, 25),
             Coordinate(1, 10, 1),
             Coordinate(1, 10, 10),
             Coordinate(1, 10, 25),
             Coordinate(1, 25, 1),
             Coordinate(1, 25, 10),
             Coordinate(1, 25, 25),
             Coordinate(10, 1, 1),
             Coordinate(10, 1, 10),
             Coordinate(10, 1, 25),
             Coordinate(10, 10, 1),
             Coordinate(10, 10, 10),
             Coordinate(10, 10, 25),
             Coordinate(10, 25, 1),
             Coordinate(10, 25, 10),
             Coordinate(10, 25, 25),
             Coordinate(25, 1, 1),
             Coordinate(25, 1, 10),
             Coordinate(25, 1, 25),
             Coordinate(25, 10, 1),
             Coordinate(25, 10, 10),
             Coordinate(25, 10, 25),
             Coordinate(25, 25, 1),
             Coordinate(25, 25, 10),
             Coordinate(25, 25, 25)
         }, set(experiment.coordinates))
     self.assertSetEqual(
         {
             Callpath('main'),
             Callpath('main->init_mat'),
             Callpath('main->zero_mat'),
             Callpath('main->mat_mul')
         }, set(experiment.callpaths))
     call_tree = CallTree()
     main = Node('main', Callpath('main'))
     call_tree.add_child_node(main)
     init_mat = Node('init_mat', Callpath('main->init_mat'))
     main.add_child_node(init_mat)
     zero_mat = Node('zero_mat', Callpath('main->zero_mat'))
     main.add_child_node(zero_mat)
     mat_mul = Node('mat_mul', Callpath('main->mat_mul'))
     main.add_child_node(mat_mul)
     self.assertEqual(call_tree, experiment.call_tree)
     self.assertSetEqual(
         {
             Metric('visits'),
             Metric('time'),
             Metric('min_time'),
             Metric('max_time'),
             Metric('PAPI_FP_OPS'),
             Metric('PAPI_L3_TCM'),
             Metric('PAPI_L2_TCM')
         }, set(experiment.metrics))
     read_cube_file('data/cubeset/multi_parameter', 'strong')
Exemplo n.º 17
0
def read_cube_file(dir_name,
                   scaling_type,
                   pbar=DUMMY_PROGRESS,
                   selected_metrics=None):
    # read the paths of the cube files in the given directory with dir_name
    path = Path(dir_name)
    if not path.is_dir():
        raise FileFormatError(
            f'Cube file path must point to a directory: {dir_name}')
    cubex_files = list(path.glob('*/[!.]*.cubex'))
    if not cubex_files:
        raise FileFormatError(f'No cube files were found in: {dir_name}')
    pbar.total += len(cubex_files) + 6
    # iterate over all folders and read the cube profiles in them
    experiment = Experiment()

    pbar.step("Reading cube files")
    parameter_names_initial = []
    parameter_names = []
    parameter_values = []
    parameter_dict = defaultdict(set)
    progress_step_size = 5 / len(cubex_files)
    for path_id, path in enumerate(cubex_files):
        pbar.update(progress_step_size)
        folder_name = path.parent.name
        logging.debug(f"Cube file: {path} Folder: {folder_name}")

        # create the parameters
        par_start = folder_name.find(".") + 1
        par_end = folder_name.find(".r")
        par_end = None if par_end == -1 else par_end
        parameters = folder_name[par_start:par_end]
        # parameters = folder_name.split(".")

        # set scaling flag for experiment
        if path_id == 0:
            if scaling_type == "weak" or scaling_type == "strong":
                experiment.scaling = scaling_type

        param_list = re.split('([0-9.,]+)', parameters)
        param_list.remove("")

        parameter_names = [n for i, n in enumerate(param_list) if i % 2 == 0]
        parameter_value = [
            float(n.replace(',', '.').rstrip('.'))
            for i, n in enumerate(param_list) if i % 2 == 1
        ]

        # check if parameter already exists
        if path_id == 0:
            parameter_names_initial = parameter_names
        elif parameter_names != parameter_names_initial:
            raise FileFormatError(
                f"Parameters must be the same and in the same order: {parameter_names} is not {parameter_names_initial}."
            )

        for n, v in zip(parameter_names, parameter_value):
            parameter_dict[n].add(v)
        parameter_values.append(parameter_value)

    # determine non-constant parameters and add them to experiment
    parameter_selection_mask = []
    for i, p in enumerate(parameter_names):
        if len(parameter_dict[p]) > 1:
            experiment.add_parameter(Parameter(p))
            parameter_selection_mask.append(i)

    # check number of parameters, if > 1 use weak scaling instead
    # since sum values for strong scaling does not work for more than 1 parameter
    if scaling_type == 'strong' and len(experiment.parameters) > 1:
        warnings.warn(
            "Strong scaling only works for one parameter. Using weak scaling instead."
        )
        scaling_type = 'weak'
        experiment.scaling = scaling_type

    pbar.step("Reading cube files")

    show_warning_skipped_metrics = set()
    aggregated_values = defaultdict(list)

    # import data from cube files
    # optimize import memory usage by reordering files and grouping by coordinate
    num_points = 0
    reordered_files = sorted(zip(cubex_files, parameter_values),
                             key=itemgetter(1))
    for parameter_value, point_group in groupby(reordered_files,
                                                key=itemgetter(1)):
        num_points += 1
        # create coordinate
        coordinate = Coordinate(parameter_value[i]
                                for i in parameter_selection_mask)
        experiment.add_coordinate(coordinate)

        aggregated_values.clear()
        for path, _ in point_group:
            pbar.update()
            with CubexParser(str(path)) as parsed:
                callpaths = make_callpath_mapping(parsed.get_root_cnodes())
                # iterate over all metrics
                for cube_metric in parsed.get_metrics():
                    pbar.update(0)
                    # NOTE: here we could choose which metrics to extract
                    if selected_metrics and cube_metric.name not in selected_metrics:
                        continue
                    try:
                        metric_values = parsed.get_metric_values(
                            metric=cube_metric, cache=False)
                        # create the metrics
                        metric = Metric(cube_metric.name)

                        for cnode_id in metric_values.cnode_indices:
                            pbar.update(0)
                            cnode = parsed.get_cnode(cnode_id)
                            callpath = callpaths[cnode_id]
                            # NOTE: here we can use clustering algorithm to select only certain node level values
                            # create the measurements
                            cnode_values = metric_values.cnode_values(
                                cnode, convert_to_exclusive=True)

                            # in case of weak scaling calculate mean and median over all mpi process values
                            if scaling_type == "weak":
                                # do NOT use generator it is slower
                                aggregated_values[(callpath, metric)].extend(
                                    map(float, cnode_values))

                                # in case of strong scaling calculate the sum over all mpi process values
                            elif scaling_type == "strong":
                                aggregated_values[(callpath, metric)].append(
                                    float(sum(cnode_values)))

                    # Take care of missing metrics
                    except MissingMetricError as e:  # @UnusedVariable
                        show_warning_skipped_metrics.add(e.metric.name)
                        logging.info(
                            f'The cubex file {Path(*path.parts[-2:])} does not contain data for the metric "{e.metric.name}"'
                        )

        # add measurements to experiment
        for (callpath, metric), values in aggregated_values.items():
            pbar.update(0)
            experiment.add_measurement(
                Measurement(coordinate, callpath, metric, values))

    pbar.step("Unify calltrees")
    to_delete = []
    # determine common callpaths for common calltree
    # add common callpaths and metrics to experiment
    for key, value in pbar(experiment.measurements.items(),
                           len(experiment.measurements),
                           scale=0.1):
        if len(value) < num_points:
            to_delete.append(key)
        else:
            (callpath, metric) = key
            experiment.add_callpath(callpath)
            experiment.add_metric(metric)
    for key in to_delete:
        pbar.update(0)
        del experiment.measurements[key]

    # determine calltree
    call_tree = io_helper.create_call_tree(experiment.callpaths,
                                           pbar,
                                           progress_scale=0.1)
    experiment.call_tree = call_tree

    if show_warning_skipped_metrics:
        warnings.warn(
            "The following metrics were skipped because they contained no data: "
            f"{', '.join(show_warning_skipped_metrics)}. For more details see log."
        )

    io_helper.validate_experiment(experiment, pbar)
    pbar.update()
    return experiment
Exemplo n.º 18
0
 def getParameter(self):
     p = Parameter(self.combo_box.currentText())
     p.id = self.combo_box.currentIndex()
     return p
Exemplo n.º 19
0
def deserialize_parameter(id_mappings, ioHelper):
    id = ioHelper.readId()
    paramName = ioHelper.readString()
    id_mappings.parameter_mapping[paramName] = id
    return Parameter(paramName)
Exemplo n.º 20
0
def read_text_file(path, progress_bar=DUMMY_PROGRESS):
    # read text file into list
    with open(path) as file:
        lines = file.readlines()

    # remove empty lines
    lines_no_space = [l for l in lines if not l.isspace()]

    # remove line breaks
    lines_no_space = [l.replace("\n", "") for l in lines_no_space]

    # create an experiment object to save the date loaded from the text file
    experiment = Experiment()

    # variables for parsing
    number_parameters = 0
    last_metric = None
    last_callpath = Callpath("")
    coordinate_id = 0

    if len(lines_no_space) == 0:
        raise FileFormatError(f'File contains no data: "{path}"')

    # parse text to extrap objects
    for i, line in enumerate(progress_bar(lines)):
        if line.isspace() or line.startswith('#'):
            continue  # allow comments
        line = re_whitespace.sub(' ', line)
        # get field name
        field_separator_idx = line.find(" ")
        field_name = line[:field_separator_idx]
        field_value = line[field_separator_idx + 1:].strip()

        if field_name == "METRIC":
            # create a new metric if not already exists
            metric_name = field_value
            test_metric = Metric(metric_name)
            if test_metric not in experiment.metrics:
                metric = test_metric
                experiment.add_metric(metric)
                last_metric = metric
            else:
                last_metric = metric
            # reset the coordinate id, since moving to a new region
            coordinate_id = 0

        elif field_name == "REGION":
            # create a new region if not already exists
            callpath_name = field_value

            callpath = Callpath(callpath_name)
            experiment.add_callpath(callpath)
            last_callpath = callpath

            # reset the coordinate id, since moving to a new region
            coordinate_id = 0

        elif field_name == "DATA":
            if last_metric is None:
                last_metric = Metric("")
            # create a new data set
            data_string = field_value
            data_list = data_string.split(" ")
            values = [float(d) for d in data_list]
            if 1 <= number_parameters <= 4:
                # create one measurement per repetition

                if coordinate_id >= len(experiment.coordinates):
                    raise FileFormatError(
                        f'To many DATA lines ({coordinate_id}) for the number of POINTS '
                        f'({len(experiment.coordinates)}) in line {i}.')
                measurement = Measurement(
                    experiment.coordinates[coordinate_id], last_callpath,
                    last_metric, values)
                experiment.add_measurement(measurement)
                coordinate_id += 1
            elif number_parameters >= 5:
                raise FileFormatError(
                    "This input format supports a maximum of 4 parameters.")
            else:
                raise FileFormatError("This file has no parameters.")

        elif field_name == "PARAMETER":
            # create a new parameter
            parameters = field_value.split(' ')
            experiment.parameters += [Parameter(p) for p in parameters]
            number_parameters = len(experiment.parameters)

        elif field_name == "POINTS":
            coordinate_string = field_value.strip()
            if '(' in coordinate_string:
                coordinate_string = coordinate_string.replace(") (", ")(")
                coordinate_string = coordinate_string[1:-1]
                coordinate_strings = coordinate_string.split(')(')
            else:
                coordinate_strings = coordinate_string.split(' ')
            # create a new point
            if number_parameters == 1:
                coordinates = [
                    Coordinate(float(c)) for c in coordinate_strings
                ]
                experiment.coordinates.extend(coordinates)
            elif 1 < number_parameters < 5:
                for coordinate_string in coordinate_strings:
                    coordinate_string = coordinate_string.strip()
                    values = coordinate_string.split(" ")
                    coordinate = Coordinate(float(v) for v in values)
                    experiment.coordinates.append(coordinate)
            elif number_parameters >= 5:
                raise FileFormatError(
                    "This input format supports a maximum of 4 parameters.")
            else:
                raise FileFormatError("This file has no parameters.")
        else:
            raise FileFormatError(
                f'Encountered wrong field: "{field_name}" in line {i}: {line}')

    if last_metric == Metric(''):
        experiment.metrics.append(last_metric)
    if last_metric == Callpath(''):
        experiment.callpaths.append(last_callpath)
    # create the call tree and add it to the experiment
    call_tree = create_call_tree(experiment.callpaths,
                                 progress_bar,
                                 progress_scale=10)
    experiment.call_tree = call_tree

    io_helper.validate_experiment(experiment, progress_bar)

    return experiment
Exemplo n.º 21
0
def read_jsonlines_file(path, progress_bar=DUMMY_PROGRESS):
    # create an experiment object to save the date loaded from the text file
    experiment = Experiment()

    complete_data = {}
    parameters = None
    default_callpath = Callpath('<root>')
    default_metric = Metric('<default>')

    progress_bar.total += os.path.getsize(path)

    # read jsonlines file into complete_data
    with open(path) as file:
        progress_bar.step('Reading file')
        for ln, line in enumerate(file):
            progress_bar.update(len(line))
            if line.isspace():
                continue

            try:
                data = json.loads(line)
            except JSONDecodeError as error:
                raise FileFormatError(
                    f'Decoding of line {ln} failed: {str(error)}. Line: "{line}"'
                )
            try:
                if 'callpath' in data:
                    callpath = Callpath(data['callpath'])
                else:
                    callpath = default_callpath

                if 'metric' in data:
                    metric = Metric(data['metric'])
                else:
                    metric = default_metric
                key = callpath, metric
                if parameters is None:  # ensures uniform order of paremeters
                    parameters = [Parameter(p) for p in data['params'].keys()]
                coordinate = Coordinate(data['params'][p.name]
                                        for p in parameters)
                io_helper.append_to_repetition_dict(complete_data, key,
                                                    coordinate, data['value'],
                                                    progress_bar)
            except KeyError as error:
                raise FileFormatError(
                    f'Missing property in line {ln}: {str(error)}. Line: "{line}"'
                )

    # create experiment
    io_helper.repetition_dict_to_experiment(complete_data, experiment,
                                            progress_bar)

    for p in parameters:
        experiment.add_parameter(p)

    callpaths = experiment.callpaths
    experiment.call_tree = create_call_tree(callpaths, progress_bar)

    io_helper.validate_experiment(experiment, progress_bar)

    return experiment
Exemplo n.º 22
0
def _read_legacy_json_file(experiment, json_data, progress_bar):
    # read parameters
    parameter_data = json_data["parameters"]
    parameter_data = sorted(parameter_data, key=lambda x: x["id"])
    logging.debug("Number of parameters: " + str(len(parameter_data)))
    for i, p_data in enumerate(progress_bar(parameter_data)):
        parameter_name = p_data["name"]
        parameter = Parameter(parameter_name)
        experiment.add_parameter(parameter)
        logging.debug("Parameter " + str(i + 1) + ": " + parameter_name)
    # read callpaths
    callpath_data = json_data["callpaths"]
    callpath_data = sorted(callpath_data, key=lambda x: x["id"])
    logging.debug("Number of callpaths: " + str(len(callpath_data)))
    for i, c_data in enumerate(progress_bar(callpath_data)):
        callpath_name = c_data["name"]
        callpath = Callpath(callpath_name)
        experiment.add_callpath(callpath)
        logging.debug("Callpath " + str(i + 1) + ": " + callpath_name)
    # read metrics
    metric_data = json_data["metrics"]
    metric_data = sorted(metric_data, key=lambda x: x["id"])
    logging.debug("Number of metrics: " + str(len(metric_data)))
    for i, m_data in enumerate(progress_bar(metric_data)):
        metric_name = m_data["name"]
        metric = Metric(metric_name)
        experiment.add_metric(metric)
        logging.debug("Metric " + str(i + 1) + ": " + metric_name)
    # read coordinates
    coordinate_data = json_data["coordinates"]
    coordinate_data = sorted(coordinate_data, key=lambda x: x["id"])
    logging.debug("Number of coordinates: " + str(len(coordinate_data)))
    for i, c_data in enumerate(progress_bar(coordinate_data)):
        parameter_value_pairs = c_data["parameter_value_pairs"]
        parameter_value_pairs = sorted(parameter_value_pairs,
                                       key=lambda x: x["parameter_id"])
        coordinate = Coordinate(
            float(p["parameter_value"]) for p in parameter_value_pairs)
        experiment.add_coordinate(coordinate)
        logging.debug(f"Coordinate {i + 1}: {coordinate}")
    aggregate_data = {}
    # read measurements
    measurements_data = json_data["measurements"]
    logging.debug("Number of measurements: " + str(len(measurements_data)))
    for i, m_data in enumerate(progress_bar(measurements_data)):
        coordinate_id = int(m_data["coordinate_id"]) - 1
        callpath_id = int(m_data["callpath_id"]) - 1
        metric_id = int(m_data["metric_id"]) - 1
        value = float(m_data["value"])
        key = coordinate_id, callpath_id, metric_id
        if key in aggregate_data:
            aggregate_data[key].append(value)
        else:
            aggregate_data[key] = [value]
    for key in progress_bar(aggregate_data):
        coordinate_id, callpath_id, metric_id = key
        coordinate = experiment.coordinates[coordinate_id]
        callpath = experiment.callpaths[callpath_id]
        metric = experiment.metrics[metric_id]
        values = aggregate_data[key]
        measurement = Measurement(coordinate, callpath, metric, values)
        experiment.add_measurement(measurement)