LIN(), POL(degree=4), LINSVR(random_state=seed), GPyR(GPy.kern.Matern52(input_dim=1, variance=1.0, lengthscale=1.0)), ] direct_models = [ LIN(), POL(degree=4), LINSVR(random_state=seed), GPyR(GPy.kern.Matern52(input_dim=2, variance=1.0, lengthscale=1.0)), GPyMT(GPy.kern.Matern52(input_dim=2, variance=1.0, lengthscale=1.0)), CHOR(Chorus_config), ] fmap = ModelMapMap.ModelMapMapDifference0_1(auth_model, map_models) # ----------------------------------------------------------------------------------------------------------------- # # TPC-C Scenario 1 - Predicting Performance with Increased CPU Resources # # ----------------------------------------------------------------------------------------------------------------- experiment_tpcc_1 = ModelMapExperiment.ModelMapExperiment( authoritative_dataset_filename = authoritative_dataset_filename, domain_columns = domain_columns, codomain_columns = codomain_columns, num_runs = num_runs, sampling_budgets = sampling_budgets, authoritative_models = authoritative_models, authoritative_model_dataset_range = authoritative_model_dataset_range,
map_models = [ LINM(), ] direct_models = [] Application_Experiment_2a = ModelMapExperiment.ModelMapExperiment( authoritative_dataset_filename=authoritative_dataset_filename, domain_columns=domain_columns, codomain_columns=codomain_columns, num_runs=num_runs, sampling_budgets=sampling_budgets, authoritative_models=auth_model, authoritative_model_dataset_range=authoritative_model_dataset_range, unknown_function_dataset_range=unknown_function_dataset_range, map=ModelMapMap.ModelMapMapComposition0_1(auth_model, map_models), direct_modeling_methods=direct_models, active_learning=False, model_selection=False, logger=logger, title="APP-Scenario2a-") map_models = [ LIN(), POL(degree=4), LINSVR(random_state=seed), GPyR(GPy.kern.Matern52(input_dim=3, variance=1.0, lengthscale=1.0)), ] direct_models = [ LIN(),
map_models = [ LINM(), ] direct_models = [] Application_Experiment_3a = ModelMapExperiment.ModelMapExperiment( authoritative_dataset_filename=authoritative_dataset_filename, domain_columns=domain_columns, codomain_columns=codomain_columns, num_runs=num_runs, sampling_budgets=sampling_budgets, authoritative_models=auth_model, authoritative_model_dataset_range=authoritative_model_dataset_range, unknown_function_dataset_range=unknown_function_dataset_range, map=ModelMapMap.ModelMapMapComposition0_1(auth_model, map_models), direct_modeling_methods=direct_models, active_learning=False, model_selection=False, logger=logger, title="APP-Scenario3a-") map_models = [ LIN(), POL(degree=4), LINSVR(random_state=seed), GPyR(GPy.kern.Matern52(input_dim=3, variance=1.0, lengthscale=1.0)), ] direct_models = [ LIN(),
POL(degree=4), LINSVR(random_state=seed), GPyR(GPy.kern.Matern52(input_dim=1, variance=1.0, lengthscale=1.0)), ] experiment_tpcc_4a = ModelMapExperiment( authoritative_dataset_filename=authoritative_dataset_filename, unknown_dataset_filename=unknown_dataset_filename, domain_columns=domain_columns, codomain_columns=codomain_columns, num_runs=num_runs, sampling_budgets=sampling_budgets, authoritative_models=authoritative_models, authoritative_model_dataset_range=authoritative_model_dataset_range, unknown_function_dataset_range=unknown_function_dataset_range, map=ModelMapMap.ModelMapMapComposition0_1(auth_model, map_models_a), direct_modeling_methods=direct_models, active_learning=False, model_selection=False, log2_columns=log2_columns, normalization='N', logger=logger, title="TPC-C-Scenario4a-") map_models_b = [ LIN(), POL(degree=4), LINSVR(random_state=seed), GPyR(GPy.kern.Matern52(input_dim=2, variance=1.0, lengthscale=1.0)), ]
LIN(), POL(degree=4), LINSVR(random_state=seed), GPyR(GPy.kern.Matern52(input_dim=3, variance=1.0, lengthscale=1.0)), ] direct_models = [ LIN(), POL(degree=4), LINSVR(random_state=seed), GPyR(GPy.kern.Matern52(input_dim=5, variance=1.0, lengthscale=1.0)), GPyMT(GPy.kern.Matern52(input_dim=5, variance=1.0, lengthscale=1.0)), CHOR(Chorus_config), ] fmap = ModelMapMap.ModelMapMapDifferenceL_1(auth_model, map_models, ["inode_size", "bg_count"]) # ----------------------------------------------------------------------------------------------------------------- # # Filesystem Scenario 3 - Modeling the effects of disk type (SATA -> 500SAS) and I/O scheduler (CFQ -> Deadline) # # ----------------------------------------------------------------------------------------------------------------- Filesystem_Experiment_3 = ModelMapExperiment.ModelMapExperiment( authoritative_dataset_filename=authoritative_dataset_filename, domain_columns=domain_columns, codomain_columns=codomain_columns, num_runs=num_runs, sampling_budgets=sampling_budgets, authoritative_models=auth_model, authoritative_model_dataset_range=authoritative_model_dataset_range,
map_models = [ LINM(), ] direct_models = [] Application_Experiment_1a = ModelMapExperiment.ModelMapExperiment( authoritative_dataset_filename=authoritative_dataset_filename, domain_columns=domain_columns, codomain_columns=codomain_columns, num_runs=num_runs, sampling_budgets=sampling_budgets, authoritative_models=auth_model, authoritative_model_dataset_range=authoritative_model_dataset_range, unknown_function_dataset_range=unknown_function_dataset_range, map=ModelMapMap.ModelMapMapComposition0_1(auth_model, map_models), direct_modeling_methods=direct_models, active_learning=False, model_selection=False, logger=logger, title="APP-Scenario1a-" ) map_models = [ LIN(), POL(degree=4), LINSVR(random_state=seed), GPyR(GPy.kern.Matern52(input_dim=2, variance=1.0, lengthscale=1.0)), ] direct_models = [
LIN(), POL(degree=4), LINSVR(random_state=seed), GPyR(GPy.kern.Matern52(input_dim=5, variance=1.0, lengthscale=1.0)), ] direct_models = [ LIN(), POL(degree=4), LINSVR(random_state=seed), GPyR(GPy.kern.Matern52(input_dim=7, variance=1.0, lengthscale=1.0)), GPyMT(GPy.kern.Matern52(input_dim=7, variance=1.0, lengthscale=1.0)), CHOR(Chorus_config), ] fmap = ModelMapMap.ModelMapMapCompositionL_1(auth_model, map_models, ["inode_size", "bg_count", "disk_type", "io_scheduler"]) # ----------------------------------------------------------------------------------------------------------------- # # Filesystem Scenario 5 - Modeling the effects changing filesystem (ext3 -> ext4) # # ----------------------------------------------------------------------------------------------------------------- Filesystem_Experiment_5 = ModelMapExperiment.ModelMapExperiment( authoritative_dataset_filename=authoritative_dataset_filename, unknown_dataset_filename=unknown_dataset_filename, domain_columns=domain_columns, codomain_columns=codomain_columns, num_runs=num_runs, sampling_budgets=sampling_budgets, authoritative_models=auth_model,
def experiment_func(iteration): # create logger with 'modelmap' logger = logging.getLogger("ModelMap") logger.setLevel(logging.DEBUG) # create file handler which logs even debug messages file_logger = logging.FileHandler('modelmap.log') # create console handler with a higher log level console_logger = logging.StreamHandler() # ---------------------------------------------------------------------------- file_logger.setLevel(logging.DEBUG) console_logger.setLevel(logging.DEBUG) # ---------------------------------------------------------------------------- formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') file_logger.setFormatter(formatter) console_logger.setFormatter(formatter) # add the handlers to the logger logger.addHandler(file_logger) logger.addHandler(console_logger) ###################################################################################################### # Chorus regressor fit_dimension = 0 # Chorus G_RGN model G_RGN_config = {"fit_column": fit_dimension + 1, "type": "poly", "order": "4" } # Chorus B_SVM global model B_SVM_config = {"USE_AVG": "true"} # Chorus B_LIN global region split model B_LIN_config = {"max_regions": 2, "specdb": {0: (0.0, 1.0, 0), 1: (0.0, 1.0, 0), 2: (0.0, 1.0, 0)} } # Full Chorus3 ensemble model Chorus_config = { "models": { "Chorus_G_RGN": G_RGN_config, "Chorus_B_CNST": dict(), "Chorus_B_LIN": B_LIN_config, "Chorus_B_SVM": B_SVM_config, }, "max_regions": 2, "n_way_cross": 5, "specdb": {0: (0.0, 1.0, 0), 1: (0.0, 1.0, 0), 2: (0.0, 1.0, 0)} } ###################################################################################################### seed = 1 LIN = type('LIN', LinearRegression.__bases__, dict(LinearRegression.__dict__)) LIN.label = "Linear regression" POL = type('POL', PolynomialRegressor.__bases__, dict(PolynomialRegressor.__dict__)) POL.label = "Polynomial regression" LINSVR = type('POL', LinearSVR.__bases__, dict(LinearSVR.__dict__)) LINSVR.label = "Linear SVR" GPyR = type('GPyR', GPyRegressor.__bases__, dict(GPyRegressor.__dict__)) GPyR.label = "Gaussian Process" GPyMT = type('GPyMT', GPyRegressorMultiTask.__bases__, dict(GPyRegressorMultiTask.__dict__)) GPyMT.label = "Multi-Task Gaussian Process*" CHOR = ChorusIncrementalRegressor CHOR.label = "Chorus*" ###################################################################################################### # Authoritative model auth_model = DatabaseRegressor() # Dataset file authoritative_dataset_filename = "Datasets/tpcc_blade_3min_10WH_20170725.csv" unknown_dataset_filename = "Datasets/tpcc_blade_3min_20WH.csv" # Columns containing the function domain domain_columns = ["CPU", "disk_bandwidth_quanta", "buffer_pool"] # Columns containing the function codomain codomain_columns = "TPMC" # list of Number of samples sampling_budgets = [5, 10] # Authoritative model authoritative_model_dataset_range = { "CPU": [0, 1], "disk_bandwidth_quanta": [1, 48], "buffer_pool": [1, 1024] } # Unknown model unknown_function_dataset_range = { "CPU": [0, 1], "disk_bandwidth_quanta": [1, 48], "buffer_pool": [1, 1024] } log2_columns = ["CPU", "disk_bandwidth_quanta", "buffer_pool"] map_models = [ LIN(), POL(degree=4), LINSVR(random_state=seed), GPyR(GPy.kern.Matern52(input_dim=2, variance=1.0, lengthscale=1.0)), ] direct_models = [ LIN(), POL(degree=4), LINSVR(random_state=seed), GPyR(GPy.kern.Matern52(input_dim=3, variance=1.0, lengthscale=1.0)), GPyMT(GPy.kern.Matern52(input_dim=3, variance=1.0, lengthscale=1.0)), CHOR(Chorus_config), ] fmap = ModelMapMap.ModelMapMapDifferenceL_1(auth_model, map_models, ["disk_bandwidth_quanta"]) # ----------------------------------------------------------------------------------------------------------------- # # TPC-C Scenario 5 - Modeling VM packing optimization (10WH -> 20WH map class (L>0:1)) # # ----------------------------------------------------------------------------------------------------------------- experiment = ModelMapVMPackingExperiment.ModelMapVMPackingExperiment( authoritative_dataset_filename=authoritative_dataset_filename, unknown_dataset_filename=unknown_dataset_filename, domain_columns=domain_columns, codomain_columns=codomain_columns, num_runs=iteration, sampling_budgets=sampling_budgets, authoritative_models=auth_model, authoritative_model_dataset_range=authoritative_model_dataset_range, unknown_function_dataset_range=unknown_function_dataset_range, map = fmap, direct_modeling_methods=direct_models, log2_columns=log2_columns, logger=logger, title="TPC-C-Scenario5-" ) # Run the experiment experiment.run()