Пример #1
0
    def test_general(self):
        experiment = read_text_file('data/text/one_parameter_6.txt')
        # initialize model generator
        model_generator = ModelGenerator(experiment, RefiningModeler())

        # create models from data
        model_generator.model_all()

        models = experiment.modelers[0].models
        cp0 = Callpath('met1'), Metric('')
        self.assertIsInstance(models[cp0].hypothesis, ConstantHypothesis)
        self.assertAlmostEqual(models[cp0].hypothesis.function.constant_coefficient, 4.068)

        cp1 = Callpath('met2'), Metric('')
        self.assertIsInstance(models[cp1].hypothesis, SingleParameterHypothesis)
        self.assertEqual(len(models[cp1].hypothesis.function.compound_terms), 1)
        self.assertEqual(len(models[cp1].hypothesis.function.compound_terms[0].simple_terms), 1)
        self.assertEqual(models[cp1].hypothesis.function.compound_terms[0].simple_terms[0].term_type, 'polynomial')
        self.assertAlmostEqual(models[cp1].hypothesis.function.compound_terms[0].simple_terms[0].exponent, 2.0)

        cp2 = Callpath('met3'), Metric('')
        self.assertIsInstance(models[cp2].hypothesis, SingleParameterHypothesis)
        self.assertEqual(len(models[cp2].hypothesis.function.compound_terms), 1)
        self.assertEqual(len(models[cp2].hypothesis.function.compound_terms[0].simple_terms), 1)
        self.assertEqual(models[cp2].hypothesis.function.compound_terms[0].simple_terms[0].term_type, 'polynomial')
        self.assertAlmostEqual(models[cp2].hypothesis.function.compound_terms[0].simple_terms[0].exponent, 2.0)

        cp3 = Callpath('met4'), Metric('')
        self.assertIsInstance(models[cp3].hypothesis, SingleParameterHypothesis)
        self.assertEqual(len(models[cp3].hypothesis.function.compound_terms), 1)
        self.assertEqual(len(models[cp3].hypothesis.function.compound_terms[0].simple_terms), 1)
        self.assertEqual(models[cp3].hypothesis.function.compound_terms[0].simple_terms[0].term_type, 'polynomial')
        self.assertAlmostEqual(models[cp3].hypothesis.function.compound_terms[0].simple_terms[0].exponent, 2.0)
Пример #2
0
 def test_single_parameter(self):
     experiment = read_cube_file('data/cubeset/single_parameter', 'weak')
     self.assertListEqual([Parameter('x')], experiment.parameters)
     self.assertSetEqual(
         {
             Coordinate(1),
             Coordinate(10),
             Coordinate(25),
             Coordinate(50),
             Coordinate(100),
             Coordinate(250),
             Coordinate(500),
             Coordinate(1000),
             Coordinate(2000)
         }, set(experiment.coordinates))
     self.assertSetEqual(
         {
             Callpath('main'),
             Callpath('main->init_mat'),
             Callpath('main->zero_mat'),
             Callpath('main->mat_mul')
         }, set(experiment.callpaths))
     self.assertSetEqual(
         {
             Metric('visits'),
             Metric('time'),
             Metric('min_time'),
             Metric('max_time'),
             Metric('PAPI_FP_OPS'),
             Metric('PAPI_L3_TCM'),
             Metric('PAPI_L2_TCM')
         }, set(experiment.metrics))
     read_cube_file('data/cubeset/single_parameter', 'strong')
Пример #3
0
def deserialize_callpath(id_mappings, ioHelper):
    id = ioHelper.readId()
    region_id = ioHelper.readId()
    parent_id = ioHelper.readId()

    region_name = id_mappings.region_mapping[region_id]
    if parent_id != -1:
        parent = id_mappings.callpath_mapping[parent_id]
        callpath = Callpath(parent.name + '->' + region_name)
    else:
        callpath = Callpath(region_name)

    id_mappings.callpath_mapping[id] = callpath
    return callpath
Пример #4
0
    def test_3parameters_reversed(self):
        experiment = read_text_file('data/text/three_parameter_1.txt')

        modeler = MultiParameterModeler()
        measurements = experiment.measurements[(Callpath('reg'),
                                                Metric('metr'))]
        measurements = list(reversed(measurements))

        f_msm = modeler.find_best_measurement_points(measurements)

        self.assertEqual(len(f_msm), 3)
        self.assertListEqual([m.coordinate for m in f_msm[0]], [
            Coordinate(60),
            Coordinate(50),
            Coordinate(40),
            Coordinate(30),
            Coordinate(20)
        ])
        self.assertListEqual([m.coordinate for m in f_msm[1]], [
            Coordinate(5),
            Coordinate(4),
            Coordinate(3),
            Coordinate(2),
            Coordinate(1)
        ])
        self.assertListEqual([m.coordinate for m in f_msm[2]], [
            Coordinate(500),
            Coordinate(400),
            Coordinate(300),
            Coordinate(200),
            Coordinate(100)
        ])
Пример #5
0
    def test_3parameters_bands(self):
        experiment = read_jsonlines_file(
            'data/jsonlines/matrix_3p_bands.jsonl')

        modeler = MultiParameterModeler()
        measurements = experiment.measurements[(Callpath('<root>'),
                                                Metric('metr'))]

        f_msm = modeler.find_best_measurement_points(measurements)

        self.assertEqual(len(f_msm), 3)
        self.assertListEqual([m.coordinate for m in f_msm[0]], [
            Coordinate(1),
            Coordinate(2),
            Coordinate(3),
            Coordinate(4),
            Coordinate(5)
        ])
        self.assertListEqual([0] + [1] * 4, [m.mean for m in f_msm[0]])
        self.assertListEqual([m.coordinate for m in f_msm[1]], [
            Coordinate(1),
            Coordinate(2),
            Coordinate(3),
            Coordinate(4),
            Coordinate(5)
        ])
        self.assertListEqual([0.5] + [2.5] * 4, [m.mean for m in f_msm[1]])
        self.assertListEqual([m.coordinate for m in f_msm[2]], [
            Coordinate(1),
            Coordinate(2),
            Coordinate(3),
            Coordinate(4),
            Coordinate(5)
        ])
        self.assertListEqual([0] + [4] * 4, [m.mean for m in f_msm[2]])
Пример #6
0
    def test_2parameters_random(self):
        experiment = read_text_file('data/text/two_parameter_1.txt')

        modeler = MultiParameterModeler()
        measurements = experiment.measurements[(Callpath('reg'),
                                                Metric('metr'))]
        for _ in range(len(measurements)):
            shuffle(measurements)

            f_msm = modeler.find_first_measurement_points(measurements)

            self.assertEqual(len(f_msm), 2)
            self.assertSetEqual(
                set(m.coordinate for m in f_msm[0]), {
                    Coordinate(20),
                    Coordinate(30),
                    Coordinate(40),
                    Coordinate(50),
                    Coordinate(60)
                })
            self.assertSetEqual(
                set(m.coordinate for m in f_msm[1]), {
                    Coordinate(1),
                    Coordinate(2),
                    Coordinate(3),
                    Coordinate(4),
                    Coordinate(5)
                })
Пример #7
0
    def test_3parameters_sparse(self):
        experiment = read_jsonlines_file('data/jsonlines/matrix_3p.jsonl')

        modeler = MultiParameterModeler()
        measurements = experiment.measurements[(Callpath('<root>'), Metric('metr'))]
        for _ in range(len(measurements)):
            shuffle(measurements)

            f_msm = modeler.find_best_measurement_points(measurements)

            self.assertEqual(len(f_msm), 3)
            self.assertSetEqual(set(m.coordinate for m in f_msm[0]), {
                Coordinate(1),
                Coordinate(2),
                Coordinate(3),
                Coordinate(4),
                Coordinate(5)
            })
            self.assertListEqual([1] * 5, [m.mean for m in f_msm[0]])
            self.assertSetEqual(set(m.coordinate for m in f_msm[1]), {
                Coordinate(1),
                Coordinate(2),
                Coordinate(3),
                Coordinate(4),
                Coordinate(5)
            })
            self.assertListEqual([1] * 5, [m.mean for m in f_msm[1]])
            self.assertSetEqual(set(m.coordinate for m in f_msm[2]), {
                Coordinate(1),
                Coordinate(2),
                Coordinate(3),
                Coordinate(4),
                Coordinate(5)
            })
            self.assertListEqual([1] * 5, [m.mean for m in f_msm[2]])
Пример #8
0
    def test_2parameters_reversed(self):
        experiment = read_text_file('data/text/two_parameter_1.txt')

        modeler = MultiParameterModeler()
        measurements = experiment.measurements[(Callpath('reg'),
                                                Metric('metr'))]
        measurements = list(reversed(measurements))

        f_msm = modeler.find_best_measurement_points(measurements)

        self.assertEqual(len(f_msm), 2)
        self.assertListEqual([m.coordinate for m in f_msm[0]], [
            Coordinate((60, )),
            Coordinate((50, )),
            Coordinate((40, )),
            Coordinate((30, )),
            Coordinate((20, ))
        ])
        self.assertListEqual([m.coordinate for m in f_msm[1]], [
            Coordinate((5, )),
            Coordinate((4, )),
            Coordinate((3, )),
            Coordinate((2, )),
            Coordinate((1, ))
        ])
Пример #9
0
 def test_read_2(self):
     Parameter.ID_COUNTER = itertools.count()
     experiment = read_json_file("data/json/new/input2.json")
     x = Parameter('x')
     y = Parameter('y')
     self.assertListEqual(experiment.parameters, [x, y])
     self.assertListEqual(experiment.coordinates, [
         Coordinate(4, 10),
         Coordinate(8, 20),
         Coordinate(16, 30),
         Coordinate(32, 40),
         Coordinate(64, 50)
     ])
     self.assertListEqual(experiment.metrics,
                          [Metric('time'), Metric('visits')])
     self.assertListEqual(
         experiment.callpaths,
         [Callpath('sweep'), Callpath('sweep2')])
Пример #10
0
 def test_read_1_json(self):
     Parameter.ID_COUNTER = itertools.count()
     experiment = read_json_file("data/jsonlines/test1.jsonl")
     self.assertListEqual([Parameter('x'), Parameter('y')],
                          experiment.parameters)
     self.assertListEqual([0, 1], [p.id for p in experiment.parameters])
     self.assertListEqual([
         Coordinate(x, y) for x in range(1, 5 + 1) for y in range(1, 5 + 1)
     ], experiment.coordinates)
     self.assertListEqual([Metric('metr')], experiment.metrics)
     self.assertListEqual([Callpath('<root>')], experiment.callpaths)
Пример #11
0
 def test_read_2(self):
     Parameter.ID_COUNTER = itertools.count()
     experiment = read_jsonlines_file("data/jsonlines/test2.jsonl")
     self.assertListEqual([Parameter('p'), Parameter('n')],
                          experiment.parameters)
     self.assertListEqual([0, 1], [p.id for p in experiment.parameters])
     self.assertListEqual([
         Coordinate(x, y) for x in [16, 32, 64, 128, 256]
         for y in [100, 200, 300, 400, 500]
     ], experiment.coordinates)
     self.assertListEqual([Metric('metr')], experiment.metrics)
     self.assertListEqual([Callpath('<root>')], experiment.callpaths)
Пример #12
0
def read_talpas_file(path, progress_bar=DUMMY_PROGRESS):
    # create an experiment object to save the date loaded from the text file
    experiment = Experiment()

    complete_data = {}
    parameters = None

    progress_bar.total += os.path.getsize(path)
    # read talpas file into complete_data
    with open(path) as file:

        progress_bar.step('Reading file')
        for ln, line in enumerate(file):
            progress_bar.update(len(line))
            if line.isspace():
                continue
            line = line.replace(';', ',')

            try:
                data = json.loads(line)
            except JSONDecodeError as error:
                raise FileFormatError(
                    f'Decoding of line {ln} failed: {str(error).replace(",", ";")}. Line: "{line}"'
                )
            try:
                key = Callpath(data['callpath']), Metric(data['metric'])
                if parameters is None:
                    parameters = [
                        Parameter(p) for p in data['parameters'].keys()
                    ]
                coordinate = Coordinate(data['parameters'][p.name]
                                        for p in parameters)
                io_helper.append_to_repetition_dict(complete_data, key,
                                                    coordinate, data['value'],
                                                    progress_bar)
            except KeyError as error:
                raise FileFormatError(
                    f'Missing property in line {ln}: {str(error)}. Line: "{line}"'
                )

    # create experiment
    io_helper.repetition_dict_to_experiment(complete_data, experiment,
                                            progress_bar)

    for p in parameters:
        experiment.add_parameter(p)

    call_tree = create_call_tree(experiment.callpaths, progress_bar)
    experiment.call_tree = call_tree

    io_helper.validate_experiment(experiment, progress_bar)

    return experiment
Пример #13
0
 def test_matrix3p(self):
     Parameter.ID_COUNTER = itertools.count()
     experiment = read_jsonlines_file("data/jsonlines/matrix_3p.jsonl")
     self.assertListEqual(
         [Parameter('x'), Parameter('y'),
          Parameter('z')], experiment.parameters)
     self.assertListEqual([0, 1, 2], [p.id for p in experiment.parameters])
     self.assertListEqual([Coordinate(x, 1, 1) for x in range(1, 5 + 1)] +
                          [Coordinate(1, x, 1) for x in range(2, 5 + 1)] +
                          [Coordinate(1, 1, x) for x in range(2, 5 + 1)],
                          experiment.coordinates)
     self.assertListEqual([Metric('metr')], experiment.metrics)
     self.assertListEqual([Callpath('<root>')], experiment.callpaths)
Пример #14
0
 def test_read_1(self):
     Parameter.ID_COUNTER = itertools.count()
     experiment = read_json_file("data/json/input_1.JSON")
     self.assertListEqual(experiment.parameters, [Parameter('x')])
     self.assertListEqual([p.id for p in experiment.parameters], [0])
     self.assertListEqual(experiment.coordinates, [
         Coordinate(4),
         Coordinate(8),
         Coordinate(16),
         Coordinate(32),
         Coordinate(64)
     ])
     self.assertListEqual(experiment.metrics, [Metric('time')])
     self.assertListEqual(experiment.callpaths, [Callpath('sweep')])
Пример #15
0
def make_callpath_mapping(cnodes):
    callpaths = {}

    def walk_tree(parent_cnode, parent_name):
        for cnode in parent_cnode.get_children():
            name = cnode.region.name
            path_name = '->'.join((parent_name, name))
            callpaths[cnode.id] = Callpath(path_name)
            walk_tree(cnode, path_name)

    for root_cnode in cnodes:
        name = root_cnode.region.name
        callpath = Callpath(name)
        callpaths[root_cnode.id] = callpath
        walk_tree(root_cnode, name)

    return callpaths
Пример #16
0
def _read_new_json_file(experiment, json_data, progress_bar):
    parameter_data = json_data["parameters"]
    for p in parameter_data:
        parameter = Parameter(p)
        experiment.add_parameter(parameter)

    measurements_data = json_data["measurements"]
    for callpath_name, data in progress_bar(measurements_data.items()):
        for metric_name, measurements in data.items():
            for measurement in measurements:
                coordinate = Coordinate(measurement['point'])
                experiment.add_coordinate(coordinate)
                callpath = Callpath(callpath_name)
                experiment.add_callpath(callpath)
                metric = Metric(metric_name)
                experiment.add_metric(metric)
                measurement = Measurement(coordinate, callpath, metric,
                                          measurement['values'])
                experiment.add_measurement(measurement)
Пример #17
0
    def test_3parameters_bands_incomplete(self):
        experiment = read_jsonlines_file('data/jsonlines/matrix_3p_bands_incomplete.jsonl')

        modeler = MultiParameterModeler()
        measurements = experiment.measurements[(Callpath('<root>'), Metric('metr'))]

        f_msm = modeler.find_best_measurement_points(measurements)

        self.assertEqual(len(f_msm), 3)
        self.assertListEqual([m.coordinate for m in f_msm[0]], [
            Coordinate(c) for c in [1, 3, 4, 5, 6]
        ])
        self.assertListEqual([0] + [1] * 4, [m.mean for m in f_msm[0]])
        self.assertListEqual([m.coordinate for m in f_msm[1]], [
            Coordinate(c) for c in range(1, 5 + 1)
        ])
        self.assertListEqual([0] + [2] * 4, [m.mean for m in f_msm[1]])
        self.assertListEqual([m.coordinate for m in f_msm[2]], [
            Coordinate(c) for c in range(1, 5 + 1)
        ])
        self.assertListEqual([0] + [4] * 4, [m.mean for m in f_msm[2]])

        measurements.reverse()

        f_msm = modeler.find_best_measurement_points(measurements)

        self.assertEqual(len(f_msm), 3)
        self.assertListEqual([m.coordinate for m in f_msm[0]], [
            Coordinate(c) for c in reversed([1, 3, 4, 5, 6])
        ])
        self.assertListEqual([1] * 4 + [0], [m.mean for m in f_msm[0]])
        self.assertListEqual([m.coordinate for m in f_msm[1]], [
            Coordinate(c) for c in [6, 5, 4, 3, 2]
        ])
        self.assertListEqual([3] * 5, [m.mean for m in f_msm[1]])
        self.assertListEqual([m.coordinate for m in f_msm[2]], [
            Coordinate(c) for c in reversed(range(1, 5 + 1))
        ])
        self.assertListEqual([4] * 4 + [0], [m.mean for m in f_msm[2]])
Пример #18
0
 def test_sparse_experiment(self):
     experiment = read_extrap3_experiment('data/input/experiment_3_sparse')
     self.assertListEqual([Parameter('x'), Parameter('y'), Parameter('z')], experiment.parameters)
     self.assertSetEqual({Coordinate(1, 1, 1), Coordinate(1, 1, 10), Coordinate(1, 1, 25),
                          Coordinate(1, 10, 1), Coordinate(1, 10, 10), Coordinate(1, 10, 25),
                          Coordinate(1, 25, 1), Coordinate(1, 25, 10), Coordinate(1, 25, 25),
                          Coordinate(10, 1, 1), Coordinate(10, 1, 10), Coordinate(10, 1, 25),
                          Coordinate(10, 10, 1), Coordinate(10, 10, 10), Coordinate(10, 10, 25),
                          Coordinate(10, 25, 1), Coordinate(10, 25, 10), Coordinate(10, 25, 25),
                          Coordinate(25, 1, 1), Coordinate(25, 1, 10), Coordinate(25, 1, 25),
                          Coordinate(25, 10, 1), Coordinate(25, 10, 10), Coordinate(25, 10, 25),
                          Coordinate(25, 25, 1), Coordinate(25, 25, 10), Coordinate(25, 25, 25)
                          }, set(experiment.coordinates))
     self.assertSetEqual({Callpath('main'), Callpath('main->init_mat'), Callpath('main->zero_mat'),
                          Callpath('main->mat_mul')}, set(experiment.callpaths))
     call_tree = CallTree()
     main = Node('main', Callpath('main'))
     call_tree.add_child_node(main)
     init_mat = Node('init_mat', Callpath('main->init_mat'))
     main.add_child_node(init_mat)
     zero_mat = Node('zero_mat', Callpath('main->zero_mat'))
     main.add_child_node(zero_mat)
     mat_mul = Node('mat_mul', Callpath('main->mat_mul'))
     main.add_child_node(mat_mul)
Пример #19
0
 def test_sparse_matrix2p(self):
     Parameter.ID_COUNTER = itertools.count()
     experiment = read_jsonlines_file(
         "data/jsonlines/sparse_matrix_2p.jsonl")
     self.assertListEqual([Parameter('x'), Parameter('y')],
                          experiment.parameters)
     self.assertListEqual([0, 1], [p.id for p in experiment.parameters])
     self.assertListEqual([
         Coordinate(20, 1),
         Coordinate(30, 1),
         Coordinate(30, 2),
         Coordinate(40, 1),
         Coordinate(40, 2),
         Coordinate(40, 3),
         Coordinate(50, 1),
         Coordinate(50, 2),
         Coordinate(50, 3),
         Coordinate(50, 4),
         Coordinate(60, 1),
         Coordinate(60, 2),
         Coordinate(60, 3),
         Coordinate(60, 4),
         Coordinate(60, 5),
         Coordinate(70, 2),
         Coordinate(70, 3),
         Coordinate(70, 4),
         Coordinate(70, 5),
         Coordinate(80, 3),
         Coordinate(80, 4),
         Coordinate(80, 5),
         Coordinate(90, 4),
         Coordinate(90, 5),
         Coordinate(100, 5)
     ], experiment.coordinates)
     self.assertListEqual([Metric('metr')], experiment.metrics)
     self.assertListEqual([Callpath('<root>')], experiment.callpaths)
Пример #20
0
def main():
    experiment = text_file_reader.read_text_file(
        "tests/data/text/two_parameter_1.txt")
    modeller = GPUDirectMultiParameterModeler()
    model = modeller.create_model(experiment.measurements[(Callpath('reg'),
                                                           Metric('metr'))])
Пример #21
0
 def test_multi_parameter(self):
     experiment = read_cube_file('data/cubeset/multi_parameter', 'weak')
     self.assertListEqual(
         [Parameter('x'), Parameter('y'),
          Parameter('z')], experiment.parameters)
     self.assertSetEqual(
         {
             Coordinate(1, 1, 1),
             Coordinate(1, 1, 10),
             Coordinate(1, 1, 25),
             Coordinate(1, 10, 1),
             Coordinate(1, 10, 10),
             Coordinate(1, 10, 25),
             Coordinate(1, 25, 1),
             Coordinate(1, 25, 10),
             Coordinate(1, 25, 25),
             Coordinate(10, 1, 1),
             Coordinate(10, 1, 10),
             Coordinate(10, 1, 25),
             Coordinate(10, 10, 1),
             Coordinate(10, 10, 10),
             Coordinate(10, 10, 25),
             Coordinate(10, 25, 1),
             Coordinate(10, 25, 10),
             Coordinate(10, 25, 25),
             Coordinate(25, 1, 1),
             Coordinate(25, 1, 10),
             Coordinate(25, 1, 25),
             Coordinate(25, 10, 1),
             Coordinate(25, 10, 10),
             Coordinate(25, 10, 25),
             Coordinate(25, 25, 1),
             Coordinate(25, 25, 10),
             Coordinate(25, 25, 25)
         }, set(experiment.coordinates))
     self.assertSetEqual(
         {
             Callpath('main'),
             Callpath('main->init_mat'),
             Callpath('main->zero_mat'),
             Callpath('main->mat_mul')
         }, set(experiment.callpaths))
     call_tree = CallTree()
     main = Node('main', Callpath('main'))
     call_tree.add_child_node(main)
     init_mat = Node('init_mat', Callpath('main->init_mat'))
     main.add_child_node(init_mat)
     zero_mat = Node('zero_mat', Callpath('main->zero_mat'))
     main.add_child_node(zero_mat)
     mat_mul = Node('mat_mul', Callpath('main->mat_mul'))
     main.add_child_node(mat_mul)
     self.assertEqual(call_tree, experiment.call_tree)
     self.assertSetEqual(
         {
             Metric('visits'),
             Metric('time'),
             Metric('min_time'),
             Metric('max_time'),
             Metric('PAPI_FP_OPS'),
             Metric('PAPI_L3_TCM'),
             Metric('PAPI_L2_TCM')
         }, set(experiment.metrics))
     read_cube_file('data/cubeset/multi_parameter', 'strong')
Пример #22
0
 def walk_tree(parent_cnode, parent_name):
     for cnode in parent_cnode.get_children():
         name = cnode.region.name
         path_name = '->'.join((parent_name, name))
         callpaths[cnode.id] = Callpath(path_name)
         walk_tree(cnode, path_name)
Пример #23
0
def _read_legacy_json_file(experiment, json_data, progress_bar):
    # read parameters
    parameter_data = json_data["parameters"]
    parameter_data = sorted(parameter_data, key=lambda x: x["id"])
    logging.debug("Number of parameters: " + str(len(parameter_data)))
    for i, p_data in enumerate(progress_bar(parameter_data)):
        parameter_name = p_data["name"]
        parameter = Parameter(parameter_name)
        experiment.add_parameter(parameter)
        logging.debug("Parameter " + str(i + 1) + ": " + parameter_name)
    # read callpaths
    callpath_data = json_data["callpaths"]
    callpath_data = sorted(callpath_data, key=lambda x: x["id"])
    logging.debug("Number of callpaths: " + str(len(callpath_data)))
    for i, c_data in enumerate(progress_bar(callpath_data)):
        callpath_name = c_data["name"]
        callpath = Callpath(callpath_name)
        experiment.add_callpath(callpath)
        logging.debug("Callpath " + str(i + 1) + ": " + callpath_name)
    # read metrics
    metric_data = json_data["metrics"]
    metric_data = sorted(metric_data, key=lambda x: x["id"])
    logging.debug("Number of metrics: " + str(len(metric_data)))
    for i, m_data in enumerate(progress_bar(metric_data)):
        metric_name = m_data["name"]
        metric = Metric(metric_name)
        experiment.add_metric(metric)
        logging.debug("Metric " + str(i + 1) + ": " + metric_name)
    # read coordinates
    coordinate_data = json_data["coordinates"]
    coordinate_data = sorted(coordinate_data, key=lambda x: x["id"])
    logging.debug("Number of coordinates: " + str(len(coordinate_data)))
    for i, c_data in enumerate(progress_bar(coordinate_data)):
        parameter_value_pairs = c_data["parameter_value_pairs"]
        parameter_value_pairs = sorted(parameter_value_pairs,
                                       key=lambda x: x["parameter_id"])
        coordinate = Coordinate(
            float(p["parameter_value"]) for p in parameter_value_pairs)
        experiment.add_coordinate(coordinate)
        logging.debug(f"Coordinate {i + 1}: {coordinate}")
    aggregate_data = {}
    # read measurements
    measurements_data = json_data["measurements"]
    logging.debug("Number of measurements: " + str(len(measurements_data)))
    for i, m_data in enumerate(progress_bar(measurements_data)):
        coordinate_id = int(m_data["coordinate_id"]) - 1
        callpath_id = int(m_data["callpath_id"]) - 1
        metric_id = int(m_data["metric_id"]) - 1
        value = float(m_data["value"])
        key = coordinate_id, callpath_id, metric_id
        if key in aggregate_data:
            aggregate_data[key].append(value)
        else:
            aggregate_data[key] = [value]
    for key in progress_bar(aggregate_data):
        coordinate_id, callpath_id, metric_id = key
        coordinate = experiment.coordinates[coordinate_id]
        callpath = experiment.callpaths[callpath_id]
        metric = experiment.metrics[metric_id]
        values = aggregate_data[key]
        measurement = Measurement(coordinate, callpath, metric, values)
        experiment.add_measurement(measurement)
Пример #24
0
def read_jsonlines_file(path, progress_bar=DUMMY_PROGRESS):
    # create an experiment object to save the date loaded from the text file
    experiment = Experiment()

    complete_data = {}
    parameters = None
    default_callpath = Callpath('<root>')
    default_metric = Metric('<default>')

    progress_bar.total += os.path.getsize(path)

    # read jsonlines file into complete_data
    with open(path) as file:
        progress_bar.step('Reading file')
        for ln, line in enumerate(file):
            progress_bar.update(len(line))
            if line.isspace():
                continue

            try:
                data = json.loads(line)
            except JSONDecodeError as error:
                raise FileFormatError(
                    f'Decoding of line {ln} failed: {str(error)}. Line: "{line}"'
                )
            try:
                if 'callpath' in data:
                    callpath = Callpath(data['callpath'])
                else:
                    callpath = default_callpath

                if 'metric' in data:
                    metric = Metric(data['metric'])
                else:
                    metric = default_metric
                key = callpath, metric
                if parameters is None:  # ensures uniform order of paremeters
                    parameters = [Parameter(p) for p in data['params'].keys()]
                coordinate = Coordinate(data['params'][p.name]
                                        for p in parameters)
                io_helper.append_to_repetition_dict(complete_data, key,
                                                    coordinate, data['value'],
                                                    progress_bar)
            except KeyError as error:
                raise FileFormatError(
                    f'Missing property in line {ln}: {str(error)}. Line: "{line}"'
                )

    # create experiment
    io_helper.repetition_dict_to_experiment(complete_data, experiment,
                                            progress_bar)

    for p in parameters:
        experiment.add_parameter(p)

    callpaths = experiment.callpaths
    experiment.call_tree = create_call_tree(callpaths, progress_bar)

    io_helper.validate_experiment(experiment, progress_bar)

    return experiment
Пример #25
0
def read_cube_file(dir_name, scaling_type, pbar=DUMMY_PROGRESS, selected_metrics=None):
    # read the paths of the cube files in the given directory with dir_name
    path = Path(dir_name)
    if not path.is_dir():
        raise FileFormatError(f'Cube file path must point to a directory: {dir_name}')
    cubex_files = list(path.glob('*/[!.]*.cubex'))
    if not cubex_files:
        raise FileFormatError(f'No cube files were found in: {dir_name}')
    pbar.total += len(cubex_files) + 6
    # iterate over all folders and read the cube profiles in them
    experiment = Experiment()

    pbar.step("Reading cube files")
    parameter_names_initial = []
    parameter_names = []
    parameter_values = []
    parameter_dict = defaultdict(set)
    progress_step_size = 5 / len(cubex_files)
    for path_id, path in enumerate(cubex_files):
        pbar.update(progress_step_size)
        folder_name = path.parent.name
        logging.debug(f"Cube file: {path} Folder: {folder_name}")

        # create the parameters
        par_start = folder_name.find(".") + 1
        par_end = folder_name.find(".r")
        par_end = None if par_end == -1 else par_end
        parameters = folder_name[par_start:par_end]
        # parameters = folder_name.split(".")

        # set scaling flag for experiment
        if path_id == 0:
            if scaling_type == "weak" or scaling_type == "strong":
                experiment.scaling = scaling_type

        param_list = re.split('([0-9.,]+)', parameters)
        param_list.remove("")

        parameter_names = [n for i, n in enumerate(param_list) if i % 2 == 0]
        parameter_value = [float(n.replace(',', '.').rstrip('.')) for i, n in enumerate(param_list) if i % 2 == 1]

        # check if parameter already exists
        if path_id == 0:
            parameter_names_initial = parameter_names
        elif parameter_names != parameter_names_initial:
            raise FileFormatError(
                f"Parameters must be the same and in the same order: {parameter_names} is not {parameter_names_initial}.")

        for n, v in zip(parameter_names, parameter_value):
            parameter_dict[n].add(v)
        parameter_values.append(parameter_value)

    # determine non-constant parameters and add them to experiment
    parameter_selection_mask = []
    for i, p in enumerate(parameter_names):
        if len(parameter_dict[p]) > 1:
            experiment.add_parameter(Parameter(p))
            parameter_selection_mask.append(i)

    # check number of parameters, if > 1 use weak scaling instead
    # since sum values for strong scaling does not work for more than 1 parameter
    if scaling_type == 'strong' and len(experiment.parameters) > 1:
        warnings.warn("Strong scaling only works for one parameter. Using weak scaling instead.")
        scaling_type = 'weak'
        experiment.scaling = scaling_type

    pbar.step("Reading cube files")

    show_warning_skipped_metrics = set()
    aggregated_values = defaultdict(list)

    # import data from cube files
    # optimize import memory usage by reordering files and grouping by coordinate
    num_points = 0
    reordered_files = sorted(zip(cubex_files, parameter_values), key=itemgetter(1))
    for parameter_value, point_group in groupby(reordered_files, key=itemgetter(1)):
        num_points += 1
        # create coordinate
        coordinate = Coordinate(parameter_value[i] for i in parameter_selection_mask)
        experiment.add_coordinate(coordinate)

        aggregated_values.clear()
        for path, _ in point_group:
            pbar.update()
            with CubexParser(str(path)) as parsed:
                callpaths = make_callpath_mapping(parsed.get_root_cnodes())
                # iterate over all metrics
                for cube_metric in parsed.get_metrics():
                    pbar.update(0)
                    # NOTE: here we could choose which metrics to extract
                    if selected_metrics and cube_metric.name not in selected_metrics:
                        continue
                    try:
                        metric_values = parsed.get_metric_values(metric=cube_metric, cache=False)
                        # create the metrics
                        metric = Metric(cube_metric.name)

                        for cnode_id in metric_values.cnode_indices:
                            pbar.update(0)
                            cnode = parsed.get_cnode(cnode_id)
                            callpath = callpaths[cnode_id]
                            # NOTE: here we can use clustering algorithm to select only certain node level values
                            # create the measurements
                            cnode_values = metric_values.cnode_values(cnode, convert_to_exclusive=True)

                            # in case of weak scaling calculate mean and median over all mpi process values
                            if scaling_type == "weak":
                                # do NOT use generator it is slower
                                aggregated_values[(callpath, metric)].extend(map(float, cnode_values))

                                # in case of strong scaling calculate the sum over all mpi process values
                            elif scaling_type == "strong":
                                aggregated_values[(callpath, metric)].append(float(sum(cnode_values)))

                    # Take care of missing metrics
                    except MissingMetricError as e:  # @UnusedVariable
                        show_warning_skipped_metrics.add(e.metric.name)
                        logging.info(
                            f'The cubex file {Path(*path.parts[-2:])} does not contain data for the metric "{e.metric.name}"')

        # add measurements to experiment
        for (callpath, metric), values in aggregated_values.items():
            pbar.update(0)
            experiment.add_measurement(Measurement(coordinate, callpath, metric, values))

    pbar.step("Unify calltrees")
    callpaths_to_merge = []
    # determine common callpaths for common calltree
    # add common callpaths and metrics to experiment
    for key, value in pbar(experiment.measurements.items(), len(experiment.measurements), scale=0.1):
        if len(value) < num_points:
            callpaths_to_merge.append(key)
            pbar.total += 0.1
        else:
            (callpath, metric) = key
            experiment.add_callpath(callpath)
            experiment.add_metric(metric)
    for key in callpaths_to_merge:
        (callpath, metric) = key
        new_callpath: Callpath = callpath
        new_key = key
        # find parent call-path
        while new_key not in experiment.measurements and '->' in new_callpath.name:
            new_callpath = Callpath(str(new_callpath).rsplit(sep='->', maxsplit=1)[0])
            new_key = (new_callpath, metric)
        # merge parent measurements with the current measurements
        if new_key in experiment.measurements:
            measurements: Dict[Coordinate, Measurement] = {m.coordinate: m for m in experiment.measurements[new_key]}
            for m in experiment.measurements[key]:
                new_m = measurements.get(m.coordinate)
                if new_m:
                    new_m.merge(m)
                else:
                    m.callpath = experiment.measurements[new_key][0].callpath
                    experiment.measurements[new_key].append(m)
        else:
            warnings.warn("Some call paths could not be integrated into the common call tree.")
        pbar.update(0.1)
        # delete current measurements
        del experiment.measurements[key]

    # determine calltree
    call_tree = io_helper.create_call_tree(experiment.callpaths, pbar, progress_scale=0.1)
    experiment.call_tree = call_tree

    if show_warning_skipped_metrics:
        warnings.warn("The following metrics were skipped because they contained no data: "
                      f"{', '.join(show_warning_skipped_metrics)}. For more details see log.")

    io_helper.validate_experiment(experiment, pbar)
    pbar.update()
    return experiment
Пример #26
0
def read_text_file(path, progress_bar=DUMMY_PROGRESS):
    # read text file into list
    with open(path) as file:
        lines = file.readlines()

    # remove empty lines
    lines_no_space = [l for l in lines if not l.isspace()]

    # remove line breaks
    lines_no_space = [l.replace("\n", "") for l in lines_no_space]

    # create an experiment object to save the date loaded from the text file
    experiment = Experiment()

    # variables for parsing
    number_parameters = 0
    last_metric = None
    last_callpath = Callpath("")
    coordinate_id = 0

    if len(lines_no_space) == 0:
        raise FileFormatError(f'File contains no data: "{path}"')

    # parse text to extrap objects
    for i, line in enumerate(progress_bar(lines)):
        if line.isspace() or line.startswith('#'):
            continue  # allow comments
        line = re_whitespace.sub(' ', line)
        # get field name
        field_separator_idx = line.find(" ")
        field_name = line[:field_separator_idx]
        field_value = line[field_separator_idx + 1:].strip()

        if field_name == "METRIC":
            # create a new metric if not already exists
            metric_name = field_value
            test_metric = Metric(metric_name)
            if test_metric not in experiment.metrics:
                metric = test_metric
                experiment.add_metric(metric)
                last_metric = metric
            else:
                last_metric = metric
            # reset the coordinate id, since moving to a new region
            coordinate_id = 0

        elif field_name == "REGION":
            # create a new region if not already exists
            callpath_name = field_value

            callpath = Callpath(callpath_name)
            experiment.add_callpath(callpath)
            last_callpath = callpath

            # reset the coordinate id, since moving to a new region
            coordinate_id = 0

        elif field_name == "DATA":
            if last_metric is None:
                last_metric = Metric("")
            # create a new data set
            data_string = field_value
            data_list = data_string.split(" ")
            values = [float(d) for d in data_list]
            if 1 <= number_parameters <= 4:
                # create one measurement per repetition

                if coordinate_id >= len(experiment.coordinates):
                    raise FileFormatError(
                        f'To many DATA lines ({coordinate_id}) for the number of POINTS '
                        f'({len(experiment.coordinates)}) in line {i}.')
                measurement = Measurement(
                    experiment.coordinates[coordinate_id], last_callpath,
                    last_metric, values)
                experiment.add_measurement(measurement)
                coordinate_id += 1
            elif number_parameters >= 5:
                raise FileFormatError(
                    "This input format supports a maximum of 4 parameters.")
            else:
                raise FileFormatError("This file has no parameters.")

        elif field_name == "PARAMETER":
            # create a new parameter
            parameters = field_value.split(' ')
            experiment.parameters += [Parameter(p) for p in parameters]
            number_parameters = len(experiment.parameters)

        elif field_name == "POINTS":
            coordinate_string = field_value.strip()
            if '(' in coordinate_string:
                coordinate_string = coordinate_string.replace(") (", ")(")
                coordinate_string = coordinate_string[1:-1]
                coordinate_strings = coordinate_string.split(')(')
            else:
                coordinate_strings = coordinate_string.split(' ')
            # create a new point
            if number_parameters == 1:
                coordinates = [
                    Coordinate(float(c)) for c in coordinate_strings
                ]
                experiment.coordinates.extend(coordinates)
            elif 1 < number_parameters < 5:
                for coordinate_string in coordinate_strings:
                    coordinate_string = coordinate_string.strip()
                    values = coordinate_string.split(" ")
                    coordinate = Coordinate(float(v) for v in values)
                    experiment.coordinates.append(coordinate)
            elif number_parameters >= 5:
                raise FileFormatError(
                    "This input format supports a maximum of 4 parameters.")
            else:
                raise FileFormatError("This file has no parameters.")
        else:
            raise FileFormatError(
                f'Encountered wrong field: "{field_name}" in line {i}: {line}')

    if last_metric == Metric(''):
        experiment.metrics.append(last_metric)
    if last_metric == Callpath(''):
        experiment.callpaths.append(last_callpath)
    # create the call tree and add it to the experiment
    call_tree = create_call_tree(experiment.callpaths,
                                 progress_bar,
                                 progress_scale=10)
    experiment.call_tree = call_tree

    io_helper.validate_experiment(experiment, progress_bar)

    return experiment