def _fit(self, X, y): self._check(X, y) assert (dim(y) == 1) beta = zeros(shape(X)[1]) # row vector X_T = matrix_transpose(X) if self.fit_intercept: beta[0] = sum(minus(reshape(y, -1), dot(X, beta[1:]))) / (shape(X)[0]) for _ in range(self.max_iter): print(_) start = 1 if self.fit_intercept else 0 for j in range(start, len(beta)): tmp_beta = [x for x in beta] tmp_beta[j] = 0.0 r_j = minus(reshape(y, -1), dot(X, beta)) # r_j = minus(reshape(y,-1) , dot(X, tmp_beta)) arg1 = dot(X_T[j], r_j) arg2 = self.alpha * shape(X)[0] if sum(square(X_T[j])) != 0: beta[j] = self._soft_thresholding_operator( arg1, arg2) / sum(square(X_T[j])) else: beta[j] = 0 if self.fit_intercept: beta[0] = sum(minus(reshape(y, -1), dot( X, beta[1:]))) / (shape(X)[0]) return beta
def predict(self, X): result = [] # dim_X = dim(X) if dim(X) == 1: X = [X] for x in X: loss = sum(square(minus(self.X, x)), axis=1) index = argsort(loss)[:self.k] if self.verbose: print(index) ys = [] for i in index: ys.append(self.y[i]) k_loss_raw = sorted(loss)[:self.k] k_loss = [1 / l if l != 0 else 0 for l in k_loss_raw] k_loss_sum = sum(k_loss) weights = [ l / float(k_loss_sum) if k_loss_sum != 0 else 1 for l in k_loss ] weight_m = diag(weights) ys = matrix_matmul(weight_m, ys) result.append(sum(ys, axis=0)) if len(self.shape_Y) == 1: result = matrix_transpose(result)[0] return result
def fit(self, X, y): self._check(X, y) if dim(y) == 1: raw_X = X if self.fit_intercept: X = hstack([ones(shape(X)[0], 1), X]) beta = zeros(shape(X)[1]) # row vector X_T = matrix_transpose(X) if self.fit_intercept: beta[0] = sum(minus(reshape(y, -1), dot( raw_X, beta[1:]))) / (shape(X)[0]) for _ in range(self.max_iter): start = 1 if self.fit_intercept else 0 for j in range(start, len(beta)): tmp_beta = [x for x in beta] tmp_beta[j] = 0.0 r_j = minus(reshape(y, -1), dot(X, beta)) # r_j = minus(reshape(y,-1) , dot(X, tmp_beta)) arg1 = dot(X_T[j], r_j) arg2 = self.alpha * shape(X)[0] if sum(square(X_T[j])) != 0: beta[j] = self._soft_thresholding_operator( arg1, arg2) / sum(square(X_T[j])) else: beta[j] = 0 if self.fit_intercept: beta[0] = sum( minus(reshape(y, -1), dot( raw_X, beta[1:]))) / (shape(X)[0]) # # add whatch # self.beta = beta # self._whatch(raw_X,y) if self.fit_intercept: self.intercept_ = beta[0] self.coef_ = beta[1:] else: self.coef_ = beta self.beta = beta return self elif dim(y) == 2: if self.fit_intercept: X = hstack([ones(shape(X)[0], 1), X]) y_t = matrix_transpose(y) betas = [] for i in range(shape(y)[1]): betas.append(self._fit(X, y_t[i])) batas = matrix_transpose(betas) self.betas = batas
def fit(self, X, y, weights=None): X, y = self._check(X, y) if self.fit_intercept: m, n = shape(X) bias = ones(m, 1) X = hstack([bias, X]) eye = identity_matrix(shape(X)[1]) from linalg.matrix import diag if not self.penalty_bias: eye[0][0] = 0 # add weights if weights != None: assert (len(weights) == shape(X)[0]) X = matrix_matmul(diag(weights), X) X_T = matrix_transpose(X) self.W = matrix_matmul( matrix_matmul( matrix_inverse( plus(matrix_matmul(X_T, X), multiply(eye, self.alpha * shape(X)[0])) # plus(matrix_matmul(X_T,X),multiply(eye,self.alpha)) ), X_T), y) self.importance_ = sum(self.W, axis=1) if self.fit_intercept: self.importance_ = self.importance_[1:]
def get_backpack_score(machine_number, machine_config, flavors_unique, flavors_config, backpack_result): def _get_em_weights_of_cpu_and_mem(flavors_unique, flavors_config, em): cpu = 0 mem = 0 for k, v in em.items(): cpu += flavors_config[flavors_unique.index(k)]['CPU'] * v mem += flavors_config[flavors_unique.index(k)]['MEM'] * v return cpu, mem cpu_total_total = 0 mem_total_total = 0 cpu_used_total_total = 0 mem_used_total_total = 0 for i in range(machine_number): cpu_total = len(backpack_result[i]) * machine_config[i]['CPU'] mem_total = len(backpack_result[i]) * machine_config[i]['MEM'] cpu_total_total += cpu_total mem_total_total += mem_total # state:[(cpu,mem),(cpu,mem)...] # [(81, 155), (82, 159), (84, 157), (81, 153)] state = [ _get_em_weights_of_cpu_and_mem(flavors_unique, flavors_config, em) for em in backpack_result[i] ] cpu_used_total = sum([s[0] for s in state]) mem_used_total = sum([s[1] for s in state]) cpu_used_total_total += cpu_used_total mem_used_total_total += mem_used_total # print(cpu_used_total,cpu_total_total) # print(mem_used_total,mem_total_total) cpu_rate = cpu_used_total_total / float(cpu_total_total) mem_rate = mem_used_total_total / float(mem_total_total) return cpu_rate, mem_rate
def predict(self, X): result = [] # dim_X = dim(X) if dim(X) == 1: X = [X] for x in X: loss = sum(square(minus(self.X, x)), axis=1) # loss = sum(abs(minus(self.X,x)),axis=1) index = argsort(loss)[:self.k] if self.verbose: print(index, '/len', len(loss)) ys = [] for i in index: ys.append(self.y[i]) result.append(mean(ys, axis=0)) return result
def predict(self, X): result = [] # dim_X = dim(X) if dim(X) == 1: X = [X] for x in X: loss = sum(square(minus(self.X, x)), axis=1) # loss = sum(abs(minus(self.X,x)),axis=1) from preprocessing import standard_scaling new_X = standard_scaling(self.X, axis=0) x = sqrt(square(minus(x, mean(x)))) loss = minus(loss, multiply(dot(new_X, x), self.alpha)) index = argsort(loss)[:self.k] if self.verbose: print(index, '/len', len(loss)) ys = [] for i in index: ys.append(self.y[i]) result.append(mean(ys, axis=0)) return result
def fit(self, X, y): X, y = self._check(X, y) if self.fit_intercept: m, n = shape(X) bias = ones(m, 1) X = hstack([bias, X]) eye = identity_matrix(shape(X)[1]) from linalg.matrix import diag if self.penalty_loss: eye = diag(self.penalty_loss) X_T = matrix_transpose(X) self.W = matrix_matmul( matrix_matmul( matrix_inverse( plus(matrix_matmul(X_T, X), multiply(eye, self.alpha * shape(X)[0]))), X_T), y) self.importance_ = sum(self.W, axis=1) if self.fit_intercept: self.importance_ = self.importance_[1:]
def _whatch(self, X, y): p = self.predict(X) loss = sum(square(minus(p, y))) print(loss)
def predict_vm(ecs_lines, input_lines): if input_lines is None or ecs_lines is None: return [] machine_number, machine_name, machine_config, flavors_number, flavors_unique, flavors_config, predict_start, predict_end = parse_input_lines( input_lines) ecs_logs, training_start, training_end = parse_ecs_lines( ecs_lines, flavors_unique) prediction = special_check(ecs_logs, flavors_config, flavors_unique, training_start, training_end, predict_start, predict_end) if prediction == None: prediction = predict_flavors(ecs_logs, flavors_config, flavors_unique, training_start, training_end, predict_start, predict_end) max_score = None best_result = None min_count = None start = datetime.now() i = 0 percent = [0.99, 0.98] while (datetime.now() - start).seconds < 45: # p = random.choice(percent) p = percent[i % len(percent)] # print(p) # backpack_count,backpack_result = backpack(machine_number,machine_name,machine_config,flavors_number,flavors_unique,flavors_config,prediction,is_random=True) backpack_count, backpack_result = greedy_99_backpack(machine_number, machine_name, machine_config, flavors_number, flavors_unique, flavors_config, prediction, score_treadhold=p) # backpack_count,backpack_result = greedy_general_backpack(machine_number,machine_name,machine_config,flavors_number,flavors_unique,flavors_config,prediction) cpu_rate, mem_rate = get_backpack_score(machine_number, machine_config, flavors_unique, flavors_config, backpack_result) # find the best score solution score = (cpu_rate + mem_rate) / 2.0 # print(i,score) i += 1 if not max_score or max_score < score: max_score = score best_result = backpack_result min_count = backpack_count start = datetime.now() while (datetime.now() - start).seconds < 5: backpack_count, backpack_result = greedy_general_backpack( machine_number, machine_name, machine_config, flavors_number, flavors_unique, flavors_config, prediction) cpu_rate, mem_rate = get_backpack_score(machine_number, machine_config, flavors_unique, flavors_config, backpack_result) # find the best score solution score = (cpu_rate + mem_rate) / 2.0 # print(i,score) i += 1 if not max_score or max_score < score: max_score = score best_result = backpack_result min_count = backpack_count backpack_count, backpack_result = random_k_times(machine_number, machine_name, machine_config, flavors_number, flavors_unique, flavors_config, prediction, k=500) cpu_rate, mem_rate = get_backpack_score(machine_number, machine_config, flavors_unique, flavors_config, backpack_result) # find the best score solution score = (cpu_rate + mem_rate) / 2.0 # print(score) if not max_score or max_score < score: max_score = score best_result = backpack_result min_count = backpack_count print("max_score-->", max_score) backpack_count = min_count backpack_result = best_result # --------------build output----------------# result = [] result.append('{}'.format(sum(prediction))) for i in range(len(prediction)): result.append('flavor{} {}'.format(flavors_unique[i], prediction[i])) def _convert_machine_string(em): s = "" for k, v in em.items(): if v != 0: s += " flavor{} {}".format(k, v) return s for i in range(machine_number): c = 1 if backpack_count[i] != 0: result.append('') # output '\n' result.append('{} {}'.format(machine_name[i], backpack_count[i])) for em in backpack_result[i]: result.append('{}-{}{}'.format(machine_name[i], c, _convert_machine_string(em))) c += 1 return result
def predict_flavors(ecs_logs, flavors_config, flavors_unique, training_start, training_end, predict_start, predict_end): predict_days = (predict_end - predict_start).days #check hours = ((predict_end - predict_start).seconds / float(3600)) if hours >= 12: predict_days += 1 skip_days = (predict_start - training_end).days # print(skip_days) #checked # print(predict_days) #checked # sample = resampling(ecs_logs,flavors_unique,training_start,training_end,frequency=predict_days,strike=predict_days,skip=0) sample = resampling(ecs_logs, flavors_unique, training_start, training_end, frequency=1, strike=1, skip=0) def outlier_handling(sample, method='mean', max_sigma=3): assert (method == 'mean' or method == 'dynamic') std_ = stdev(sample) mean_ = mean(sample, axis=0) for i in range(shape(sample)[0]): for j in range(shape(sample)[1]): if sample[i][j] - mean_[j] > max_sigma * std_[j]: if method == 'mean': sample[i][j] = mean_[j] elif method == 'dynamic': if i < len(sample) / 2.0: sample[i][j] = (mean_[j] + sample[i][j]) / 2.0 return sample # sample = outlier_handling(sample,method='dynamic',max_sigma=3) # sample = outlier_handling(sample,method='mean',max_sigma=3.5) # from preprocessing import exponential_smoothing # sample = exponential_smoothing(exponential_smoothing(sample,alpha=0.2),alpha=0.2) skip_days -= 1 prediction = [] for i in range(shape(sample)[1]): clf = Ridge(alpha=1, fit_intercept=True) X = reshape(list(range(len(sample))), (-1, 1)) y = fancy(sample, None, (i, i + 1)) X_test = reshape( list(range(len(sample), len(sample) + skip_days + predict_days)), (-1, 1)) X_list = [X] X = hstack(X_list) X_test_list = [X_test] X_test = hstack(X_test_list) clf.fit(X, y) p = clf.predict(X_test) prediction.append(sum(flatten(p))) prediction = [int(round(p)) if p > 0 else 0 for p in prediction] return prediction