def _generate_id(self): """ 使用md5(host+ip+timestamp)[0:8]slave_id :return: slave_id """ host = Metric.get_host() ip = Metric.get_ip() return host + "-" + Encrypt.md5(host + ip + str(time.time()))[0:8]
def predict_model_cold_users(self): res = [] for user in self.rg.testColdUserSet_u.keys(): for item in self.rg.testColdUserSet_u[user].keys(): rating = self.rg.testColdUserSet_u[user][item] pred = self.predict(user, item) # pred = sigmoid(pred) # denormalize pred = denormalize(pred, self.config.min_val, self.config.max_val) pred = self.checkRatingBoundary(pred) res.append([user, item, rating, pred]) rmse = Metric.RMSE(res) mae = Metric.MAE(res) return rmse,mae
def _register(self): """ 向master注册slave节点,存入slave状态信息 """ if self.zk.exists("/jetsearch/slaves/" + self.id): self.id = self._generate_id() slave = { "id": self.id, "type": self.type, "host": Metric.get_host(), "addr": Metric.get_ip(), "heartbeat": Metric.get_heartbeat() } self.zk.create("/jetsearch/slaves/" + self.id, str(slave))
def extractFeatures(self, tasks): interactVec = Metric.interacMetric(tasks) moments = self.computeMoments(tasks) fourier = self.computeFourier(tasks) norm = np.linalg.norm(moments) moments = moments / (norm + 1e-15) norm = np.linalg.norm(fourier) fourier = fourier / (norm + 1e-15) features = moments.tolist() + fourier.tolist() # Idea to improve the feature vector # if necessary #nWeaklyInter = len([inter for inter in interacMetric if inter >= 1.0/4]) #nInter = len([inter for inter in interacMetric if inter >= 1.0/2]) norm = np.linalg.norm(features) # normalize the features # to make learning more robust features = features / (norm + 1e-15) self.features = features return self.features
def valid_model(self): res = [] for ind, entry in enumerate(self.rg.validSet()): user, item, rating = entry # predict prediction = self.predict(user, item) # denormalize prediction = denormalize(prediction, self.config.min_val, self.config.max_val) pred = self.checkRatingBoundary(prediction) # add prediction in order to measure # self.dao.testData[ind].append(pred) res.append([user, item, rating, pred]) rmse = Metric.RMSE(res) mae = Metric.MAE(res) self.iter_rmse.append(rmse) # for plot self.iter_mae.append(mae) return rmse, mae
def predict_model(self): '''为测试集中的用户预测''' res = [] for ind, entry in enumerate(self.rg.testSet()): user, item, rating = entry rating_length = len(self.rg.trainSet_u[user]) #冷启动用户不进行预测评分 if rating_length <= self.config.coldUserRating: continue to = time.time() prediction = self.predict(user, item) ti = time.time() pre_time = to - ti if self.config.verbose: print(user, item, rating, prediction, pre_time) res.append([user, item, rating, prediction]) rmse = Metric.RMSE(res) mae = Metric.MAE(res) return rmse, mae
def predict_model(self): res = [] for ind, entry in enumerate(self.rg.testSet()): user, item, rating = entry rating_length = len(self.rg.trainSet_u[user]) # remove cold start users for test if rating_length <= self.config.coldUserRating: continue prediction = self.predict(user, item) # denormalize prediction = denormalize(prediction, self.config.min_val, self.config.max_val) pred = self.checkRatingBoundary(prediction) # add prediction in order to measure res.append([user, item, rating, pred]) rmse = Metric.RMSE(res) mae = Metric.MAE(res) self.iter_rmse.append(rmse) # for plot self.iter_mae.append(mae) return rmse, mae
def run(self): """ 未收到终止信号前,循环向zookeeper节点写入节点状态 :return: """ while not self.end: slave = eval(str(self.zk.get("/jetsearch/slaves/" + self.slave_id)[0])) slave['heartbeat'] = Metric.get_heartbeat() slave['task_status'] = self.status self.zk.set("/jetsearch/slaves/" + self.slave_id, str(slave)) time.sleep(1)
def __init__(self, master='127.0.0.1:2181', type='spider'): """ 子节点执行器 :param master: 主节点地址 :param type: 节点类别 :return: """ self.type = type self.job = None self.task = None self.job_status = { "total": 0, "success": 0, "fail": 0 } self.wait_task_time = 0 # 连接master的zookeeper-server # 默认zk为standalone模式下的127.0.0.1:2181 self.zk = KazooClient(hosts=master) self.zk.start() # 生成slave-id并向master注册 self.id = Metric.get_host() self._register() # 创建心跳线程 self.health_check = Health(self.zk, self.id) self.health_check.update(self.job_status) # 获取分布式配置 self.config = eval(self.zk.get("/jetsearch/config")[0]) # 连接消息队列redis redis_host, redis_port = self.config.get("redis_url").split(":") self.redis = redis.Redis(host=redis_host, port=redis_port) # 连接数据库mongodb mongo_host, mongo_port = self.config.get("mongodb_url").split(":") self.mongodb = MongoClient(host=mongo_host, port=int(mongo_port)) # 开始心跳 self.health_check.start() log("[SUCCESS] slave init with id %s and type %s" % (self.id, self.type)) # 监听任务发布 @self.zk.DataWatch("/jetsearch/job") def job_watch(data, stat, event): if data: self.job = eval(data) log("[JOB] receve job: %s" % data) else: if self.job: self.job = None
def predict_model(self): '''为测试集中的用户预测''' res = [] for ind, entry in enumerate(self.rg.testSet()): user, item, rating = entry rating_length = len(self.rg.trainSet_u[user]) #冷启动用户不进行预测评分 if rating_length <= self.config.coldUserRating: continue to = time.time() prediction = self.predict(user, item) ti = time.time() pre_time = to-ti if self.config.verbose: print(user, item, rating, prediction, pre_time) res.append([user, item, rating, prediction]) ##################################################333 #0304更新推荐结果 userList = list(set([i[0] for i in res])) preDict = {} for i in res: if i[0] not in preDict.keys(): preDict[i[0]] = [] preDict[i[0]].append(i[1:]) finalRes = {} for i in userList: finalRes[i] = [] dic = {} for j in preDict[i]: dic[j[0]] = preDict[i][-1] #z = sorted(dic)[:10] list1= sorted(dic.items(),key=lambda x:x[1]) z = [i[0] for i in list1][:5] finalRes[i] = z ####################################################33 rmse = Metric.RMSE(res) mae = Metric.MAE(res) return rmse, mae, finalRes
def collectStat(self, ticks): tasks = self.scheduler.getAllTaks() alpha = self.scheduler.getAlpha() wFeatures = WorkloadFeatures(tasks) features = wFeatures.getFeatures() objVal = Metric.objFunction(tasks, self.scheduler.getCurTime()) self.objValFile.write("%d %f\n" % (ticks, objVal)) self.alphaFile.write("%d %s\n" % (ticks, str(alpha))) self.workloadFile.write("%d %s\n" % (ticks, str(features)))
def test_objFunction(self): ioList1 = IOList([IO(10, 5), IO(13, 7)]) ioList2 = IOList([IO(1, 3), IO(2, 6), IO(5, 8), IO(6, 8)]) t1 = Task("Task 1", 0, 3, 20, ioList1) t2 = Task("Task 1", 0, 5, 40, ioList2) t1.setUsedCpuTime(14) t1.setTimesBlocked(2) t1.setTotalReadyWait(4) t1.setTimesScheduled(2) t2.setUsedCpuTime(7) t2.setTimesBlocked(4) t2.setTotalReadyWait(4) t2.setTimesScheduled(2) tasks = [t1, t2] self.assertTrue(isclose(11.1428, Metric.objFunction(tasks, 10)))
def computeMoments(self, tasks): interactVec = Metric.interacMetric(tasks) # use the counting measure n = len(interactVec) # note that we work with the count measure, # thus the following moment computation is # correct for i in xrange(1,WorkloadFeatures.NumMoment+1): self.moments[i-1] = sum([inter**i for inter \ in interactVec ])/float(n) return self.moments
def computeFourier(self, tasks): interactVec = Metric.interacMetric(tasks) assert len(tasks) > 0 coeff = list(fft(interactVec)) # frequency zero corresponding to # the integral of the original signal coeff[0] = 0 coeff = np.abs(coeff) maxFreq = np.argmax(coeff) maxCoeff = np.max(coeff) n = float(len(coeff)) self.fourier = [ maxFreq/n, maxCoeff ] return self.fourier
def probe(self): # nothing to do if not self._isProbing: return False tasks = self.scheduler.getAllTaks() alpha = self.getCurAlpha() # first probe if self.alpha is None: self.alpha = alpha self.scheduler.setAlpha(alpha) return False wFeatures = WorkloadFeatures(tasks) objVal = Metric.objFunction(tasks, self.scheduler.getCurTime()) self.relation.append((wFeatures, alpha, objVal)) isOverflow = self.incCurIndices() alpha = self.getCurAlpha() self.scheduler.setAlpha(alpha) self.alpha = alpha # end probing phase if isOverflow: self.nPasses += 1 if self.nPasses == WorkloadProber.AlphaMult: self._isProbing = False return True return False
from metrics.metric import Metric print(Metric.hits({0: {1: 1}}, {0: [(1, 1)]}))