def fql_get_action(self, clients): others_below, serverload, client_load = self.get_fuzzy_variables( clients) firing = settings.FLS.get_rules_firing(others_below, serverload, client_load) indexes = [] index_action = dict() takagi_sugeno = 0 denominator = functools.reduce(lambda x, y: x + y, firing, 0) fql_q_function = 0 fql_v_function = 0 for i, v in enumerate(firing): if v > 0.0: indexes.append(i) for index in indexes: # index da regra, index do fql index_action[index] = getattr(self, f"fql_{settings.EXPLORATION}")(index) Mlog.DEBUG( "FIRING:", firing, ) Mlog.DEBUG( "INDEX:", index, ) Mlog.DEBUG("action_index:", index_action[index], self.actions) Mlog.DEBUG("INDEXES:", len(firing), len(self.actions), index_action[index]) takagi_sugeno += self.actions[index_action[index]] * firing[index] fql_q_function += self.q_table[index][ index_action[index]] * firing[index] fql_v_function += self.q_table[index][getattr( self, f"fql_{settings.EXPLORATION}")( index, force_max=True)] * firing[index] takagi_sugeno /= denominator fql_q_function /= denominator fql_v_function /= denominator self.fql_q_function = fql_q_function self.fql_v_function = fql_v_function if self.first: self.first = False else: reward = self.get_reward(clients, self.old_action) if settings.STRATEGY.count("fsl"): delta_q = reward + self.GAMMA * self.fql_q_function - self.old_fql_q_function else: delta_q = reward + self.GAMMA * self.fql_v_function - self.old_fql_q_function for index in self.old_index_action: # tem que ser causal! O atualizado não é o q-value atual, é o anterior! self.q_table[index][ self.old_index_action[index]] += delta_q * firing[index] self.old_fql_q_function = fql_q_function self.old_index_action = index_action self.old_action = takagi_sugeno return takagi_sugeno
def fql_eep(self, index, force_max=False): actions = self.q_table[index] """ Exploration/Exploitation Process """ if random.uniform(0, 1) < self.EPSILON and not force_max: action = random.randint(0, len(actions) - 1) else: Mlog.DEBUG("Max action index: ", np.argmax(actions)) action = np.argmax(actions) return action
def sum_rate(self, num): if num > 0: num_str = Bcolors.change(Bcolors.OKBLUE, num) else: num_str = Bcolors.change(Bcolors.FAIL, num) Mlog.DEBUG('INCREASE BY: ', num_str) new_rate = int(self.get_rate()) + num #print(num, self.get_rate(), new_rate, self.bw) #if new_rate * 1000 >= self.bw: self.rapi.change_meter(self.dpid, self.id, new_rate)
def choose_action(self): """Toma decisão embasada no que foi aprendido (Escolhe da Q-table)""" self.state = self.get_current_state() line = self.q_table[self.state - 1] Mlog.DEBUG([f"{i}:{line[i]}" for i in range(len(line))]) if 0 in self.q_table[self.state - 1]: return self.sample_action() action = np.argmax(self.q_table[self.state - 1]) if self.q_table[self.state - 1][action]: return action else: return self.sample_action()
def fql_softmax(self, index, force_max=False): actions = self.q_table[index] """ Exploration/Exploitation Process """ l = actions if not force_max: numerator = [math.exp(num / self.TEMPERATURE) for num in l] denominator = sum(numerator) prob = [num / denominator for num in numerator] probp = [0, prob[0]] for i in range(len(prob)): if i not in [0, len(prob) - 1]: probp.append(sum(prob[0:i + 1])) rand = random.random() for i in probp: if rand >= i: ret = probp.index(i) continue else: break return ret else: Mlog.DEBUG("Max action index: ", np.argmax(actions)) action = np.argmax(actions) return action
def do_fuzzy_action(self, value): increase_by = int( (value / 100) * 0.3 * settings.MAX_SERVER_LOAD / 1000) Mlog.DEBUG("DO FUZZY ACTION, INCREASE BY =", increase_by) self.client.sum_rate(increase_by)