def generate_mdp(mdp_type): if mdp_type == 'tunnel': return Mdp.ChainsTunnelMDP(mdpcfg.TunnelMDPConfig()) if mdp_type == 'star': return Mdp.StarMDP(mdpcfg.StarMDPConfig()) if mdp_type == 'clique': return Mdp.CliquesMDP(mdpcfg.CliqueMDPConfig()) if mdp_type == 'cliff': return Mdp.CliffWalker(mdpcfg.CliffMDPConfig()) if mdp_type == 'directed': return Mdp.DirectedTreeMDP( mdpcfg.DirectedTreeMDPConfig()) raise NotImplementedError()
CONF_FILE = TIRAMOLA_DIR + "complex_20k/mdp_small.json" ############################################################# PRINT = len(sys.argv) > 1 and sys.argv[1] == "-p" if PRINT: num_tests = 1 conf = ModelConf(CONF_FILE) assert conf.get_model_type() == MDP, "Wrong model type in MDP example" total_reward_results = [] for i in range(num_tests): scenario = ComplexScenario(training_steps, load_period, 10, MIN_VMS, MAX_VMS) model = MDPModel(conf.get_model_conf()) model.set_state(scenario.get_current_measurements()) total_reward = 0 for time in range(training_steps + eval_steps): if random.uniform(0, 1) < epsilon and time < training_steps: action = random.choice(model.get_legal_actions()) else: action = model.suggest_action() reward = scenario.execute_action(action) meas = scenario.get_current_measurements() model.update(action, meas, reward) if time % 500 == 1:
reward = served_load - 3 * vms return reward def average(l): return sum(l) / len(l) conf = ModelConf(CONF_FILE) assert conf.get_model_type() == MDP, "Wrong model type in MDP example" total_reward_results = [] for i in range(num_tests): model = MDPModel(conf.get_model_conf()) m = get_next_measurements({NUMBER_OF_VMS: 10}, (NO_OP, 0), 0) model.set_state(m) total_reward = 0 for time in range(training_steps + eval_steps): if random.uniform(0, 1) < epsilon and time < training_steps: action = random.choice(model.get_legal_actions()) else: action = model.suggest_action() m = get_next_measurements(m, action, time) reward = get_reward(m, action) model.update(action, m, reward)
TIRAMOLA_DIR = "/home/kostis/git/tiramola/" import sys sys.path.append(TIRAMOLA_DIR) from Configuration import ModelConf from Constants import * from MDPModel import MDPModel CONFIGURATION_FILE = TIRAMOLA_DIR + "examples/mdp_basic/mdp_basic.json" conf = ModelConf(CONFIGURATION_FILE) assert conf.get_model_type() == MDP, "Wrong model type in MDP example" model_conf = conf.get_model_conf() model = MDPModel(model_conf) m1 = {TOTAL_LOAD: 0.2534, NUMBER_OF_VMS: 7, TOTAL_LATENCY: 0.156} m2 = {TOTAL_LOAD: 1.5523, NUMBER_OF_VMS: 5, TOTAL_LATENCY: 0.524} m3 = {TOTAL_LOAD: 1.6605, NUMBER_OF_VMS: 6, TOTAL_LATENCY: 0.100} m4 = {TOTAL_LOAD: 0.1005, NUMBER_OF_VMS: 6, TOTAL_LATENCY: 0.250} model.set_prioritized_sweeping(0.01, 100) model.set_state(m1) action = (REMOVE_VMS, 1) model.update(action, m2, 2.5) model.update(action, m3, 1.4) model.update(action, m4, 4.2) model.print_model(detailed=True) #model.value_iteration(0.01)
def get_reward(measurements, time, action): load = get_load(time) vms = measurements[NUMBER_OF_VMS] reward = min(10 * vms, load) - 3 * vms if action[0] == ADD_VMS: reward -= 2 * action[1] elif action[0] == REMOVE_VMS: reward -= action[1] return reward conf = ModelConf(TIRAMOLA_DIR + "examples/mdp_2d_ps/mdp_2d_ps.json") assert conf.get_model_type() == MDP, "Wrong model type in MDP example" model = MDPModel(conf.get_model_conf()) max_steps = 5000 vi_step = 2500 epsilon = 0.3 time = 0 measurements = {NUMBER_OF_VMS: 1, TOTAL_LOAD: get_load(0)} model.set_state(measurements) while time < max_steps: time += 1 if random.uniform(0, 1) < epsilon and time < vi_step: action = random.choice(model.get_legal_actions()) else: