Exemplo n.º 1
0
 def test_gaussian_model_predict(self):
     training_data = TrainingData()
     config_set = UniversalConfigSet(4, 26544)
     model = GaussianModel(config_set, training_data)
     training_sample_1 = {
         "spark.executor.memory": 11945,
         "spark.sql.shuffle.partitions": 200,
         "spark.executor.cores": 2,
         "spark.driver.memory": 1024 * 4,
         "spark.sql.autoBroadcastJoinThreshold": 10,
         "spark.sql.statistics.fallBackToHdfs": 0
     }
     training_sample_2 = {
         "spark.executor.memory": 5972,
         "spark.sql.shuffle.partitions": 300,
         "spark.executor.cores": 1,
         "spark.driver.memory": 1024 * 2,
         "spark.sql.autoBroadcastJoinThreshold": 10,
         "spark.sql.statistics.fallBackToHdfs": 0
     }
     training_sample_3 = {
         "spark.executor.memory": 11945,
         "spark.sql.shuffle.partitions": 460,
         "spark.executor.cores": 2,
         "spark.driver.memory": 1024 * 4,
         "spark.sql.autoBroadcastJoinThreshold": 10,
         "spark.sql.statistics.fallBackToHdfs": 0
     }
     training_sample_4 = {
         "spark.executor.memory": 10068,
         "spark.sql.shuffle.partitions": 1660,
         "spark.executor.cores": 1,
         "spark.driver.memory": 1024,
         "spark.sql.autoBroadcastJoinThreshold": 10,
         "spark.sql.statistics.fallBackToHdfs": 0
     }
     model.add_sample_to_train_data(training_sample_1, 131)
     model.add_sample_to_train_data(training_sample_2, 143)
     model.add_sample_to_train_data(training_sample_3, 155)
     model.add_sample_to_train_data(training_sample_4, 343)
     model.train()
     config = Config()
     params = config_set.get_params()
     for param in params:
         if param.get_name() == 'spark.executor.memory':
             config.add_param(param, 10068)
         elif param.get_name() == 'spark.sql.shuffle.partitions':
             config.add_param(param, 1660)
         elif param.get_name() == 'spark.executor.cores':
             config.add_param(param, 1)
         elif param.get_name() == 'spark.driver.memory':
             config.add_param(param, 1024)
         elif param.get_name() == 'spark.sql.autoBroadcastJoinThreshold':
             config.add_param(param, 10)
         elif param.get_name() == 'spark.sql.statistics.fallBackToHdfs':
             config.add_param(param, 0)
     low, high = model.predict(
         model.normalizer.normalize_config(config.get_all_param_values()))
     assert low > (343 - 1)
     assert high < (343 + 1)
Exemplo n.º 2
0
 def __init__(self, num_cores, total_memory, training_data=TrainingData()):
     self.num_cores = num_cores
     self.total_memory = total_memory
     self.config_set = UniversalConfigSet(num_cores, total_memory)
     self.training_data = training_data
     if self.training_data.size() > 0:
         self.ml_model = GaussianModel(self.config_set, training_data)
         self.math_model = AbstractModel(self.config_set, training_data, num_cores, total_memory)
Exemplo n.º 3
0
def test_denormalization():
    config_set = UniversalConfigSet(4, 28 * 1024)
    normalizer = ConfigNormalizer(config_set)
    norm_configs = normalizer.get_all_possible_normalized_configs()
    denorm_config = normalizer.denormalize_config(norm_configs)
    i = 0
    for param in config_set.get_params():
        domain = param.get_domain()
        assert sorted(domain.get_possible_values()) == sorted(denorm_config[i])
        i = i + 1
Exemplo n.º 4
0
class Combiner:
    def __init__(self, num_cores, total_memory, training_data=TrainingData()):
        self.num_cores = num_cores
        self.total_memory = total_memory
        self.config_set = UniversalConfigSet(num_cores, total_memory)
        self.training_data = training_data
        if self.training_data.size() > 0:
            self.ml_model = GaussianModel(self.config_set, training_data)
            self.math_model = AbstractModel(self.config_set, training_data, num_cores, total_memory)

    def add_training_data(self, training_sample, output):
        self.training_data.add_training_data(self._get_training_config(training_sample), output)
        self.ml_model = GaussianModel(self.config_set, self.training_data)
        self.math_model = AbstractModel(self.config_set, self.training_data, self.num_cores, self.total_memory)

    def _get_training_config(self, training_sample):
        conf_names_params_mapping = {}
        for param in self.config_set.get_params():
            conf_names_params_mapping[param.get_name()] = param
        training_config = Config()
        for config_name, config_value in training_sample.items():
            training_config.add_param(conf_names_params_mapping[config_name], training_sample[config_name])
        return training_config

    def get_best_config(self):
        if self.training_data.size() == 0:
            raise ValueError("Training Data Not Provided")
        if self.training_data.size() == 1:
            return self.math_model.get_best_config()
        self.ml_model.train()
        sampled_configs = self.ml_model.get_sampled_configs()
        pruned_configs = self.math_model.get_pruned_config(sampled_configs)
        return self.ml_model.get_best_config_for_config_space(pruned_configs)
Exemplo n.º 5
0
def test_discretizer_for_r4_xlarge():
    config_set = UniversalConfigSet(4, 26544)
    normalizer = ConfigNormalizer(config_set)
    norm_configs = normalizer.get_all_possible_normalized_configs()
    sampler = LhsDiscreteSampler(norm_configs)
    samples = sampler.get_samples(2)
    assert max(list(map(lambda x: len(x), norm_configs))) == len(samples)
    assert all(map(lambda x: len(x) == len(norm_configs), samples))
Exemplo n.º 6
0
def test_normalization():
    config_set = UniversalConfigSet(4, 28 * 1024)
    normalizer = ConfigNormalizer(config_set)
    norm_configs = normalizer.get_all_possible_normalized_configs()
    assert len(norm_configs) == 6
    assert len(norm_configs[0]) == 200
    assert len(norm_configs[1]) == 49
    assert len(norm_configs[2]) == 22
    assert len(norm_configs[3]) == 4
    assert len(norm_configs[4]) == 19
    assert len(norm_configs[5]) == 2
Exemplo n.º 7
0
def test_normalization():
    config_set = UniversalConfigSet(4, 28 * 1024)
    normalizer = ConfigNormalizer(config_set)
    norm_configs = normalizer.get_all_possible_normalized_configs()
    expected_config = [
        [
            1.0, 0.9743589743589743, 0.9487179487179487, 0.9230769230769231,
            0.8974358974358975, 0.8717948717948718, 0.8461538461538461,
            0.8205128205128205, 0.7948717948717948, 0.7692307692307693,
            0.7435897435897436, 0.717948717948718, 0.6923076923076923,
            0.6666666666666666, 0.6410256410256411, 0.6153846153846154,
            0.5897435897435898, 0.5641025641025641, 0.5384615384615384,
            0.5128205128205128, 0.48717948717948717, 0.46153846153846156,
            0.4358974358974359, 0.41025641025641024, 0.38461538461538464,
            0.358974358974359, 0.3333333333333333, 0.3076923076923077,
            0.28205128205128205, 0.2564102564102564, 0.23076923076923078,
            0.20512820512820512, 0.1794871794871795, 0.15384615384615385,
            0.1282051282051282, 0.10256410256410256, 0.07692307692307693,
            0.05128205128205128, 0.02564102564102564, -0.0
        ],
        [
            1.0, 0.972972972972973, 0.945945945945946, 0.918918918918919,
            0.8918918918918919, 0.8648648648648649, 0.8378378378378379,
            0.8108108108108109, 0.7837837837837838, 0.7567567567567568,
            0.7297297297297298, 0.7027027027027027, 0.6756756756756757,
            0.6486486486486487, 0.6216216216216217, 0.5945945945945946,
            0.5675675675675675, 0.5405405405405406, 0.5135135135135136,
            0.4864864864864865, 0.4594594594594595, 0.43243243243243246,
            0.40540540540540543, 0.3783783783783784, 0.35135135135135137,
            0.32432432432432434, 0.2972972972972973, 0.2702702702702703,
            0.24324324324324326, 0.21621621621621623, 0.1891891891891892,
            0.16216216216216217, 0.13513513513513514, 0.10810810810810811,
            0.08108108108108109, 0.05405405405405406, 0.02702702702702703, -0.0
        ],
        [
            1.0, 0.9629629629629629, 0.9259259259259258, 0.8888888888888888,
            0.8518518518518519, 0.8148148148148148, 0.7777777777777777,
            0.7407407407407407, 0.7037037037037037, 0.6666666666666666,
            0.6296296296296295, 0.5925925925925926, 0.5555555555555556,
            0.5185185185185185, 0.48148148148148145, 0.4444444444444444,
            0.4074074074074074, 0.37037037037037035, 0.3333333333333333,
            0.2962962962962963, 0.25925925925925924, 0.2222222222222222,
            0.18518518518518517, 0.14814814814814814, 0.1111111111111111,
            0.07407407407407407, 0.037037037037037035, -0.0
        ], [1.0, 0.5, -0.0]
    ]

    assert norm_configs == expected_config
Exemplo n.º 8
0
 def test_gaussian_model_get_best_config_value(self):
     training_data = TrainingData()
     config_set = UniversalConfigSet(4, 26544)
     model = GaussianModel(config_set, training_data)
     training_sample_1 = {
         "spark.executor.memory": 11945,
         "spark.sql.shuffle.partitions": 200,
         "spark.executor.cores": 2,
         "spark.driver.memory": 1024 * 4,
         "spark.sql.autoBroadcastJoinThreshold": 10,
         "spark.sql.statistics.fallBackToHdfs": 0
     }
     training_sample_2 = {
         "spark.executor.memory": 5972,
         "spark.sql.shuffle.partitions": 300,
         "spark.executor.cores": 1,
         "spark.driver.memory": 1024 * 2,
         "spark.sql.autoBroadcastJoinThreshold": 10,
         "spark.sql.statistics.fallBackToHdfs": 0
     }
     training_sample_3 = {
         "spark.executor.memory": 11945,
         "spark.sql.shuffle.partitions": 460,
         "spark.executor.cores": 2,
         "spark.driver.memory": 1024 * 4,
         "spark.sql.autoBroadcastJoinThreshold": 10,
         "spark.sql.statistics.fallBackToHdfs": 0
     }
     training_sample_4 = {
         "spark.executor.memory": 10068,
         "spark.sql.shuffle.partitions": 1660,
         "spark.executor.cores": 1,
         "spark.driver.memory": 1024,
         "spark.sql.autoBroadcastJoinThreshold": 10,
         "spark.sql.statistics.fallBackToHdfs": 0
     }
     model.add_sample_to_train_data(training_sample_1, 131)
     model.add_sample_to_train_data(training_sample_2, 143)
     model.add_sample_to_train_data(training_sample_3, 155)
     model.add_sample_to_train_data(training_sample_4, 343)
     model.train()
     best_config = model.get_best_config()
     assert True
Exemplo n.º 9
0
def test_univarsal_config_set_exception(num_cores, memory):
    UniversalConfigSet(num_cores, memory)
Exemplo n.º 10
0
def test_universal_config_set(num_cores, memory, param_list):
    univ_config = UniversalConfigSet(num_cores, memory)
    assert (univ_config.get_params() == param_list)
Exemplo n.º 11
0
def config_set():
    return UniversalConfigSet(4, 26544)
Exemplo n.º 12
0
def gaussian_model(training_data):
    config_set = UniversalConfigSet(10, 1024 * 10)
    model = GaussianModel(config_set, training_data)
    model.train()
    return model
Exemplo n.º 13
0
def gaussian_model():
    training_data = TrainingData()
    config_set = UniversalConfigSet(10, 1024 * 10)
    yield GaussianModel(config_set, training_data)