def divide_dataset(self): self.tp = ReadFile(self.dataset, space_type=self.space_type).return_information() random.shuffle(self.tp['list_feedback']) # Get the number of interactions that each partition should have. partition_size = int(float(self.tp['ni']) / float(self.n_folds)) list_folds = list() last = -1 for p in range(self.n_folds): initial = 1 + last final = (p + 1) * partition_size list_folds.append(self.tp['list_feedback'][initial:final]) last = final for fold in range(self.n_folds): train_set = list() for fold_train in range(self.n_folds): if fold_train != fold: train_set += list_folds[fold_train] train_set.sort() list_folds[fold].sort() self.dict_folds[fold] = {'train': train_set, 'test': list_folds[fold]} if self.dir_folds is not None: WriteFile(self.dir_folds, self.dict_folds, self.space_type).cross_fold_validation()
def predict(self): for user in range(len(self.train_set['matrix'])): partial_ranking = list() u_list = list(np.flatnonzero(self.train_set['matrix'][user] == 0)) neighbors = sorted(range(len(self.su_matrix[user])), key=lambda m: -self.su_matrix[user][m]) for item in u_list: sim_suv = 0 common_user_neighbor = list( set(self.train_set['dir'][item]).intersection( set(neighbors[1:self.k]))) if len(common_user_neighbor) > 0: for user_neighbor in common_user_neighbor: sim = 0 if np.math.isnan( self.su_matrix[user][user_neighbor] ) else self.su_matrix[user][user_neighbor] sim_suv += sim partial_ranking.append( (self.train_set['map_user'][user], self.train_set['map_item'][item], sim_suv)) partial_ranking = sorted(partial_ranking, key=lambda x: -x[2])[:self.rank_number] self.ranking += partial_ranking if self.ranking_file is not None: WriteFile(self.ranking_file, self.ranking).write_recommendation()
def predict(self): for user in set(self.users): rank_user = list() for item in self.train_set['not_seen'].get(user, []): rank_user.append((user, item, random.uniform(0, 1))) rank_user = sorted(rank_user, key=lambda x: -x[2]) self.ranking += rank_user[:self.rank_number] if self.ranking_file is not None: WriteFile(self.ranking_file, self.ranking).write_recommendation()
def predict(self): for user in set(self.users): rank_user = list() for item in self.train_set['not_seen'].get(user, []): ni = len(self.train_set['di'][item]) nu = len(self.train_set['du'][user]) rank_user.append((user, item, round(ni / float(nu), 4))) rank_user = sorted(rank_user, key=lambda x: -x[2]) self.ranking += rank_user[:self.rank_number] if self.ranking_file is not None: WriteFile(self.ranking_file, self.ranking).write_recommendation()
def predict(self): for user in self.rankings_info: partial_ranking = list() for item in self.rankings_info[user]["i"]: partial_ranking.append( (self.map_user[user], self.map_item[item], self._predict_score(user, item))) partial_ranking = sorted(partial_ranking, key=lambda x: -x[2])[:self.rank_number] self.ranking += partial_ranking if self.ranking_file is not None: WriteFile(self.ranking_file, self.ranking).write_recommendation()
def predict(self): if self.test_set is not None: for user in self.test_set['users']: for item in self.test_set['feedback'][user]: u, i = self.map_users[user], self.map_items[item] self.predictions.append( (user, item, self._predict(u, i, True))) if self.prediction_file is not None: self.predictions = sorted(self.predictions, key=lambda x: x[0]) WriteFile(self.prediction_file, self.predictions).write_recommendation() return self.predictions
def divide_dataset(self): tp = ReadFile(self.dataset, space_type=self.space_type) tp.split_dataset() for fold in range(self.n_folds): dict_feedback = list() tp.triple_dataset = list(set(tp.triple_dataset)) random.shuffle(tp.triple_dataset) sp = int((1 - self.test_ratio) * len(tp.triple_dataset)) train = tp.triple_dataset[:sp] test = tp.triple_dataset[sp:] train.sort() test.sort(key=lambda x: x[0]) train_set = list() test_set = list() for i, feedback in enumerate(self.dataset): dict_individual = dict() for triple in train: try: dict_individual.setdefault(triple[0], {}).update({ triple[1]: tp.individual_interaction[i][triple[0]][triple[1]] }) train_set.append([ triple[0], triple[1], tp.individual_interaction[i][triple[0]][triple[1]] ]) except KeyError: pass for triple_test in test: try: test_set.append([ triple_test[0], triple_test[1], tp.individual_interaction[i][triple_test[0]][ triple_test[1]] ]) except KeyError: pass dict_feedback.append(dict_individual) self.dict_feedback_folds[fold] = dict_feedback self.dict_folds[fold] = {'train': train_set, 'test': test_set} if self.dir_folds is not None: WriteFile(self.dir_folds, self.dict_folds, self.space_type).split_dataset(self.dict_feedback_folds, self.dataset)
def predict(self): if self.test is not None: for user in self.test['users']: for item in self.test['feedback'][user]: list_n = list() try: ruj = 0.0 sum_sim = 0.0 for user_j in self.train['di'][item]: sim = self.su_matrix[self.map_users[user]][ self.map_users[user_j]] if np.math.isnan(sim): sim = 0.0 list_n.append((user_j, sim)) list_n = sorted(list_n, key=lambda x: -x[1]) for pair in list_n[:self.k]: try: ruj += ( self.train_set['feedback'][pair[0]][item] - self.bui[pair[0]][item]) * pair[1] sum_sim += pair[1] except KeyError: pass try: ruj = self.bui[user][item] + (ruj / sum_sim) except ZeroDivisionError: ruj = self.bui[user][item] except KeyError: ruj = self.bui[user][item] # normalize the ratings based on the highest and lowest value. if ruj > self.train_set["max"]: ruj = self.train_set["max"] if ruj < self.train_set["min"]: ruj = self.train_set["min"] self.predictions.append((user, item, ruj)) if self.prediction_file is not None: WriteFile(self.prediction_file, self.predictions).write_recommendation() return self.predictions
def predict(self): w = self.bias.T + np.dot(self.p, self.q.T) for u, user in enumerate(self.users): partial_ranking = list() user_list = sorted(range(len(w[u])), key=lambda k: w[u][k], reverse=True) for i in user_list[:100]: item = self.map_items_index[i] try: if item not in self.train_set["feedback"][user]: partial_ranking.append((user, item, w[u][i])) except KeyError: partial_ranking.append((user, item, w[u][i])) self.ranking += partial_ranking[:self.rank_number] if self.ranking_file is not None: self.ranking = sorted(self.ranking, key=lambda x: x[0]) WriteFile(self.ranking_file, self.ranking).write_recommendation()
def predict(self): if self.test_set is not None: for user in self.test_set['users']: for item in self.test_set['feedback'][user]: try: rui = self._predict(self.map_users[user], self.map_items[item]) if rui > self.train_set["max"]: rui = self.train_set["max"] if rui < self.train_set["min"]: rui = self.train_set["min"] self.predictions.append((user, item, rui)) except KeyError: self.predictions.append( (user, item, self.train_set["mean_rates"])) if self.prediction_file is not None: WriteFile(self.prediction_file, self.predictions).write_recommendation() return self.predictions
def predict(self): if self.test_set is not None: for user in self.test_set['users']: # sum (j E N(u)) Yj try: sum_imp = sum(self.y[self.dict_index[user]]) except KeyError: sum_imp = np.ones(self.factors, np.double) for item in self.test_set['feedback'][user]: self.predictions.append( (user, item, self._predict_svd_plus_plus(self.map_users[user], self.map_items[item], sum_imp))) if self.prediction_file is not None: self.predictions = sorted(self.predictions, key=lambda x: x[0]) WriteFile(self.prediction_file, self.predictions).write_recommendation() return self.predictions
def predict(self): for user in range(len(self.train_set['matrix'])): partial_ranking = list() u_list = list(np.flatnonzero(self.train_set['matrix'][user] == 0)) for item in u_list: n_list = list() for item_j in (np.nonzero(self.train_set['matrix'][user]))[0]: sim = 0 if np.math.isnan( self.si_matrix[item] [item_j]) else self.si_matrix[item][item_j] n_list.append(sim) n_list = sorted(n_list, key=lambda x: -x) partial_ranking.append( (self.train_set["map_user"][user], self.train_set["map_item"][item], sum(n_list[:self.k]))) partial_ranking = sorted(partial_ranking, key=lambda x: -x[2])[:self.rank_number] self.ranking += partial_ranking if self.ranking_file is not None: WriteFile(self.ranking_file, self.ranking).write_recommendation()
def write_ranking(self): self.final_ranking = sorted(self.final_ranking, key=lambda x: x[0]) write_ensemble = WriteFile(self.file_write, self.final_ranking, self.space_type) write_ensemble.write_recommendation()
def write_results(self): WriteFile(self.write_file, self.final_ranking).write_ensemble(self.list_users)