def genTrainPairs(self): tr_pair = [] index_extent = (-90, -180, 90, 180) ndimx = int((index_extent[3]-index_extent[1])/settings["GRID_LNG"]) ndimy = int((index_extent[2]-index_extent[0])/settings["GRID_LAT"]) user_visit = defaultdict(set) idx = 0 for entry in csv.reader(open(self.trdata_path)): sys.stdout.write("\rFINISHED PAIR NUM: %d. " % (idx+1)) sys.stdout.flush() idx += 1 uid, pid1, pid2 = int(entry[0]), int(entry[1]), int(entry[4]) if pid1 not in user_visit[uid]: near_grids = getNearGridsForPOI(self.pois_latlng[pid1], ndimx, ndimy, True) candidate_pois = [] for grididx in near_grids: candidate_pois += self.grids_pois[grididx[0]][grididx[1]] if self.nsample < len(candidate_pois): for pid in random.sample(set(candidate_pois)-set([pid1]), self.nsample): tr_pair.append([self.user_ids[uid], self.poi_ids[pid1], self.poi_ids[pid]]) user_visit[uid].add(pid1) #user_visit[uid].add(pid1) if pid2 not in user_visit[uid]: near_grids = getNearGridsForPOI(self.pois_latlng[pid2], ndimx, ndimy, True) candidate_pois = [] for grididx in near_grids: candidate_pois += self.grids_pois[grididx[0]][grididx[1]] if self.nsample < len(candidate_pois): for pid in random.sample(set(candidate_pois)-set([pid2]), self.nsample): tr_pair.append([self.user_ids[uid], self.poi_ids[pid2], self.poi_ids[pid]]) user_visit[uid].add(pid2) #user_visit[uid].add(pid2) print len(tr_pair) return tr_pair
def recommendationNewPOI(self, submission_path): index_extent = (-90, -180, 90, 180) ndimx = int((index_extent[3]-index_extent[1])/settings["GRID_LNG"]) ndimy = int((index_extent[2]-index_extent[0])/settings["GRID_LAT"]) recommendation_result = {} user_visited = defaultdict(list) for entry in csv.reader(open(self.trdata_path)): uid, pid1, pid2 = int(entry[0]), int(entry[1]), int(entry[4]) user_visited[uid].append(pid1) user_visited[uid].append(pid2) for i, entry in enumerate(csv.reader(open(self.tedata_path))): uid, pid1 = int(entry[0]), int(entry[1]) near_grids = getNearGridsForPOI(self.pois_latlng[pid1], ndimx, ndimy, True) candidate_pois = [] for grididx in near_grids: candidate_pois += self.grids_pois[grididx[0]][grididx[1]] result = [] pois_score = [] for c_pid in set(candidate_pois)-user_visited[uid]: if self.bias_tag == True: score = np.dot(self.user_factor[self.user_ids[uid]]+self.query_factor[self.poi_ids[pid1]], self.poi_factor[self.poi_ids[c_pid]])\ + self.poi_bias[self.poi_ids[c_pid]] else: score = np.dot(self.user_factor[self.user_ids[uid]]+self.query_factor[self.poi_ids[pid1]], self.poi_factor[self.poi_ids[c_pid]]) pois_score.append([c_pid, score]) result = sorted(pois_score, key=lambda x:x[1], reverse=True)[:settings["MAX_TOPK"]] recommendation_result[i] = [pair[0] for pair in result] sys.stdout.write("\rFINISHED RECOMMENDATION TRIPLE NUM: %d. " % (i+1)) sys.stdout.flush() write_submission(recommendation_result, submission_path)
def recommendation(self, submission_path): index_extent = (-90, -180, 90, 180) ndimx = int((index_extent[3]-index_extent[1])/settings["GRID_LNG"]) ndimy = int((index_extent[2]-index_extent[0])/settings["GRID_LAT"]) recommendation_result = {} cache_user_poi_score = defaultdict(dict) for i, entry in enumerate(csv.reader(open(self.tedata_path))): uid, pid1 = int(entry[0]), int(entry[1]) near_grids = getNearGridsForPOI(self.pois_latlng[pid1], ndimx, ndimy, True) candidate_pois = [] for grididx in near_grids: candidate_pois += self.grids_pois[grididx[0]][grididx[1]] result = [] pois_score = [] for c_pid in candidate_pois: if uid in cache_user_poi_score and c_pid in cache_user_poi_score[uid]: result.append([c_pid, cache_user_poi_score[uid][c_pid]]) else: if self.bias_tag == True: score = np.dot(self.user_factor[self.user_ids[uid]],\ self.poi_factor[self.poi_ids[c_pid]])\ + self.poi_bias[self.poi_ids[c_pid]] else: score = np.dot(self.user_factor[self.user_ids[uid]],\ self.poi_factor[self.poi_ids[c_pid]]) pois_score.append([c_pid, score]) cache_user_poi_score[uid][c_pid] = score result = sorted(pois_score, key=lambda x:x[1], reverse=True)[:settings["MAX_TOPK"]] recommendation_result[i] = [pair[0] for pair in result] sys.stdout.write("\rFINISHED PAIR NUM: %d. " % (i+1)) sys.stdout.flush() write_submission(recommendation_result, submission_path)
def genTrainTriples(self): tr_triple = [] index_extent = (-90, -180, 90, 180) ndimx = int((index_extent[3] - index_extent[1]) / settings["GRID_LNG"]) ndimy = int((index_extent[2] - index_extent[0]) / settings["GRID_LAT"]) idx = 0 for entry in csv.reader(open(self.trdata_path)): sys.stdout.write("\rFINISHED TRIPLE NUM: %d. " % (idx + 1)) sys.stdout.flush() idx += 1 uid, pid1, pid2 = int(entry[0]), int(entry[1]), int(entry[4]) near_grids = getNearGridsForPOI(self.pois_latlng[pid2], ndimx, ndimy, True) candidate_pois = [] for grididx in near_grids: candidate_pois += self.grids_pois[grididx[0]][grididx[1]] if self.nsample < len(candidate_pois): for pid in random.sample( set(candidate_pois) - set([pid2]), self.nsample): tr_triple.append([ self.user_ids[uid], self.category_ids[self.poi_category[pid1]], self.poi_ids[pid1], self.poi_ids[pid2], self.poi_ids[pid] ]) print len(tr_triple) return tr_triple
def recommendation(self, submission_path): index_extent = (-90, -180, 90, 180) ndimx = int((index_extent[3] - index_extent[1]) / settings["GRID_LNG"]) ndimy = int((index_extent[2] - index_extent[0]) / settings["GRID_LAT"]) recommendation_result = {} for i, entry in enumerate(csv.reader(open(self.tedata_path))): uid, pid1 = int(entry[0]), int(entry[1]) near_grids = getNearGridsForPOI(self.pois_latlng[pid1], ndimx, ndimy, True) pois_score = [] for grididx in near_grids: for candidate_poi in self.grids_pois[grididx[0]][grididx[1]]: if candidate_poi in self.per_pois_pop[uid]: pois_score.append([ candidate_poi, self.per_pois_pop[uid][candidate_poi] ]) else: pois_score.append([candidate_poi, 0]) result = sorted(pois_score, key=lambda x: x[1], reverse=True)[:settings["MAX_TOPK"]] recommendation_result[i] = [pair[0] for pair in result] print i write_submission(recommendation_result, submission_path)
def recommendation(self, submission_path): index_extent = (-90, -180, 90, 180) ndimx = int((index_extent[3] - index_extent[1]) / settings["GRID_LNG"]) ndimy = int((index_extent[2] - index_extent[0]) / settings["GRID_LAT"]) recommendation_result = {} cache_user_poi_score = defaultdict(dict) for i, entry in enumerate(csv.reader(open(self.tedata_path))): uid, pid1 = int(entry[0]), int(entry[1]) near_grids = getNearGridsForPOI(self.pois_latlng[pid1], ndimx, ndimy, True) candidate_pois = [] for grididx in near_grids: candidate_pois += self.grids_pois[grididx[0]][grididx[1]] result = [] pois_score = [] for c_pid in candidate_pois: if uid in cache_user_poi_score and c_pid in cache_user_poi_score[ uid]: result.append([c_pid, cache_user_poi_score[uid][c_pid]]) else: if self.bias_tag == True: score = np.dot(self.user_factor[self.user_ids[uid]],\ self.poi_factor[self.poi_ids[c_pid]])\ + self.poi_bias[self.poi_ids[c_pid]] else: score = np.dot(self.user_factor[self.user_ids[uid]],\ self.poi_factor[self.poi_ids[c_pid]]) pois_score.append([c_pid, score]) cache_user_poi_score[uid][c_pid] = score result = sorted(pois_score, key=lambda x: x[1], reverse=True)[:settings["MAX_TOPK"]] recommendation_result[i] = [pair[0] for pair in result] sys.stdout.write("\rFINISHED PAIR NUM: %d. " % (i + 1)) sys.stdout.flush() write_submission(recommendation_result, submission_path)
def genTrainPairs(self): tr_pair = [] index_extent = (-90, -180, 90, 180) ndimx = int((index_extent[3] - index_extent[1]) / settings["GRID_LNG"]) ndimy = int((index_extent[2] - index_extent[0]) / settings["GRID_LAT"]) user_visit = defaultdict(set) idx = 0 for entry in csv.reader(open(self.trdata_path)): sys.stdout.write("\rFINISHED PAIR NUM: %d. " % (idx + 1)) sys.stdout.flush() idx += 1 uid, pid1, pid2 = int(entry[0]), int(entry[1]), int(entry[4]) if pid1 not in user_visit[uid]: near_grids = getNearGridsForPOI(self.pois_latlng[pid1], ndimx, ndimy, True) candidate_pois = [] for grididx in near_grids: candidate_pois += self.grids_pois[grididx[0]][grididx[1]] if self.nsample < len(candidate_pois): for pid in random.sample( set(candidate_pois) - set([pid1]), self.nsample): tr_pair.append([ self.user_ids[uid], self.poi_ids[pid1], self.poi_ids[pid] ]) user_visit[uid].add(pid1) #user_visit[uid].add(pid1) if pid2 not in user_visit[uid]: near_grids = getNearGridsForPOI(self.pois_latlng[pid2], ndimx, ndimy, True) candidate_pois = [] for grididx in near_grids: candidate_pois += self.grids_pois[grididx[0]][grididx[1]] if self.nsample < len(candidate_pois): for pid in random.sample( set(candidate_pois) - set([pid2]), self.nsample): tr_pair.append([ self.user_ids[uid], self.poi_ids[pid2], self.poi_ids[pid] ]) user_visit[uid].add(pid2) #user_visit[uid].add(pid2) print len(tr_pair) return tr_pair
def recommendation(self, submission_path): index_extent = (-90, -180, 90, 180) ndimx = int((index_extent[3]-index_extent[1])/settings["GRID_LNG"]) ndimy = int((index_extent[2]-index_extent[0])/settings["GRID_LAT"]) recommendation_result = {} for i, entry in enumerate(csv.reader(open(self.tedata_path))): uid, pid1 = int(entry[0]), int(entry[1]) near_grids = getNearGridsForPOI(self.pois_latlng[pid1], ndimx, ndimy, True) candidate_pois = [] for grididx in near_grids: candidate_pois += self.grids_pois[grididx[0]][grididx[1]] pois_score = [[poi, self.pois_popularity[poi]] for poi in candidate_pois] result = sorted(pois_score, key=lambda x:x[1], reverse=True)[:settings["MAX_TOPK"]] recommendation_result[i] = [pair[0] for pair in result] print i write_submission(recommendation_result, submission_path)
def genTrainTriples(self): tr_triple = [] index_extent = (-90, -180, 90, 180) ndimx = int((index_extent[3]-index_extent[1])/settings["GRID_LNG"]) ndimy = int((index_extent[2]-index_extent[0])/settings["GRID_LAT"]) idx = 0 for entry in csv.reader(open(self.trdata_path)): sys.stdout.write("\rFINISHED TRIPLE NUM: %d. " % (idx+1)) sys.stdout.flush() idx += 1 uid, pid1, pid2 = int(entry[0]), int(entry[1]), int(entry[4]) near_grids = getNearGridsForPOI(self.pois_latlng[pid2], ndimx, ndimy, True) candidate_pois = [] for grididx in near_grids: candidate_pois += self.grids_pois[grididx[0]][grididx[1]] if self.nsample < len(candidate_pois): for pid in random.sample(set(candidate_pois)-set([pid2]), self.nsample): tr_triple.append([self.user_ids[uid], self.category_ids[self.poi_category[pid1]], self.poi_ids[pid2], self.poi_ids[pid]]) print len(tr_triple) return tr_triple