def main_sslim(): URM = sps.csr_matrix(sps.load_npz("../../Dataset/URM/data_all.npz")) URM_test = sps.csr_matrix(sps.load_npz("../../Dataset/URM/data_test.npz")) URM_1 = sps.csr_matrix(sps.load_npz("../../Dataset/old/similarities/CB-Sim.npz")) URM_2 = sps.csr_matrix(sps.load_npz("../../Dataset/old/similarities/Col-Sim.npz")) URM_3 = sps.csr_matrix(sps.load_npz("../../Dataset/old/similarities/Slim-Sim.npz")) URM_4 = normalize(URM_3, min(min(URM_1.data), min(URM_2.data)), max(max(URM_1.data), max(URM_2.data))) mauri_recsys = ReccomenderSslim(URM) validator = validate(URM_test, [10]) targetUsers = util.get_target_users("../../Dataset/target_users.csv", seek=8) #similarity_matrix = mauri.similarityMatrixTopK(0.31*URM_1 + 1.82*URM_2 + 0.76*URM_4, k=25) mauri_recsys.fit(train="-train")
def create_clusters(): user_list = list(range(0, 30911)) users_dict = dict() cold = utils.get_target_users("../../Dataset/target_users_cold.csv", seek=8) interaction_age, user_age, age = utils.create_tuples( "../../Dataset/UCM/UCM_age.csv", 13) interaction_region, user_region, region = utils.create_tuples( "../../Dataset/UCM/UCM_region.csv", 13) for u in user_list: if u in cold: user_list.remove(u) continue try: index = user_age.index(u) target_age = age[index] except ValueError: target_age = -1 try: index = user_region.index(u) target_region = region[index] except ValueError: target_region = -1 if target_age >= 0 and target_region >= 0: key = int(str(target_age) + str(target_region)) if key not in users_dict: users_dict[key] = list() users_dict[key].append(u) user_list.remove(u) elif target_age < 0 and target_region >= 0: for i in range(0, max(age)): key = int(str(i) + str(target_region)) if key not in users_dict: users_dict[key] = list() users_dict[key].append(u) user_list.remove(u) elif target_age >= 0 and target_region < 0: for i in range(min(region), max(region)): key = int(str(target_age) + str(i)) if key not in users_dict: users_dict[key] = list() users_dict[key].append(u) user_list.remove(u) users_dict[0] = user_list users_dict[10000] = list() users_dict[10001] = cold print(users_dict) clusters = [] for key in users_dict.copy().keys(): if len(users_dict[key]) < 600: for u in users_dict.get(key): users_dict[10000].append(u) users_dict.pop(key) for key in users_dict: clusters.append(users_dict[key]) return clusters
import scipy.sparse as sps import utils_new as util import numpy as np # Uncomment to generate new dataset #util.createDataset(".") URM = sps.load_npz("data_all.npz") URM = URM.tocsr() cold_users = [] targetUsers = util.get_target_users("../target_users.csv", seek=8) print(len(targetUsers)) print(max(targetUsers)) cold_user_mask = np.ediff1d(URM.indptr) == 0 user_mask = np.ediff1d(URM.indptr) < 3 #print(len(user_mask[user_mask == True])) #exit() for i in range(len(user_mask)): if user_mask[i] and i in targetUsers and not cold_user_mask[i]: cold_users.append(i) targetUsers.remove(i) with open("../target_users_lukewarm.csv", 'w') as f: f.write("user_id\n") for user_id in cold_users: f.write(str(user_id) + "\n") ''''' with open("target_users_other.csv", 'w') as f: f.write("user_id\n") for user_id in targetUsers: f.write(str(user_id) + "\n")