def __init__(self, URM_train, ICM, target_model, training=True): super(CFWBoostingRecommender, self).__init__() if (URM_train.shape[1] != ICM.shape[0]): raise ValueError( "Number of items not consistent. URM contains {} but ICM contains {}" .format(URM_train.shape[1], ICM.shape[0])) # if(S_matrix_target.shape[0] != S_matrix_target.shape[1]): # raise ValueError("Items imilarity matrix is not square: rows are {}, columns are {}".format(S_matrix_target.shape[0], # S_matrix_target.shape[1])) # if(S_matrix_target.shape[0] != ICM.shape[0]): # raise ValueError("Number of items not consistent. S_matrix contains {} but ICM contains {}".format(S_matrix_target.shape[0], # ICM.shape[0])) self.URM_train = check_matrix(URM_train, 'csr') self.ICM = check_matrix(ICM, 'csr') m = OfflineDataLoader() fold, file = m.get_model(target_model.RECOMMENDER_NAME, training=training) m1 = target_model(self.URM_train) print(m1.RECOMMENDER_NAME) m1.loadModel(folder_path=fold, file_name=file) self.S_matrix_target = check_matrix(m1.W_sparse, 'csr') self.n_items = self.URM_train.shape[1] self.n_users = self.URM_train.shape[0] self.n_features = self.ICM.shape[1] self.sparse_weights = True
def fit(self, topK=600, shrink=1000, similarity='asymmetric', normalize=True, feature_weighting="BM25", save_model=False, best_parameters=False, **similarity_args): similarity_args = {'asymmetric_alpha': 0.40273209903969387} if best_parameters: m = OfflineDataLoader() folder_path_icbf, file_name_icbf = m.get_parameter( self.RECOMMENDER_NAME) self.loadModel(folder_path=folder_path_icbf, file_name=file_name_icbf) if self.feature_weighting == "none": pass if self.feature_weighting == "BM25": self.ICM = self.ICM.astype(np.float32) self.ICM = to_okapi(self.ICM) elif self.feature_weighting == "TF-IDF": self.ICM = self.ICM.astype(np.float32) self.ICM = to_tfidf(self.ICM) similarity = Compute_Similarity(self.ICM.T, shrink=shrink, topK=topK, normalize=normalize, similarity=similarity, **similarity_args) else: self.topK = topK self.shrink = shrink similarity = Compute_Similarity(self.ICM.T, shrink=shrink, topK=topK, normalize=normalize, similarity=similarity, **similarity_args) self.parameters = "sparse_weights= {0}, similarity= {1}, shrink= {2}, neighbourhood={3}, " \ "normalize= {4}".format( self.sparse_weights, similarity, self.shrink, self.topK, normalize) if self.sparse_weights: self.W_sparse = similarity.compute_similarity() else: self.W = similarity.compute_similarity() self.W = self.W.toarray() if save_model: self.saveModel("saved_models/submission/", file_name="ItemKNNCBFRecommender_submission_model")
def fit(self, show_max_performance=False, loss_tolerance=1e-6, iteration_limit=50000, damp_coeff=0.0, topK=800, add_zeros_quota=0.9744535193088417, normalize_similarity=False, save_model=True, best_parameters=False, offline=False, location="training", submission=False): if offline: m = OfflineDataLoader() folder_path, file_name = m.get_model(self.RECOMMENDER_NAME, training=not submission) self.loadModel(folder_path=folder_path, file_name=file_name) else: if best_parameters: m = OfflineDataLoader() folder_path, file_name = m.get_parameter(self.RECOMMENDER_NAME) self.loadModel(folder_path=folder_path, file_name=file_name) else: self.normalize_similarity = normalize_similarity self.add_zeros_quota = add_zeros_quota self.topK = topK self._generateTrainData_low_ram() commonFeatures = self.ICM[self.row_list].multiply( self.ICM[self.col_list]) linalg_result = linalg.lsqr(commonFeatures, self.data_list, show=False, atol=loss_tolerance, btol=loss_tolerance, iter_lim=iteration_limit, damp=damp_coeff) # res = linalg.lsmr(commonFeatures, self.data_list, show = False, atol=loss_tolerance, btol=loss_tolerance, # maxiter = iteration_limit, damp=damp_coeff) self.D_incremental = linalg_result[0].copy() self.D_best = linalg_result[0].copy() self.epochs_best = 0 self.loss = linalg_result[3] self._compute_W_sparse() if save_model: self.saveModel("saved_models/" + location + "/", file_name=(self.RECOMMENDER_NAME + "_" + location + "_model"))
def read_data_split_and_search(): """ This function provides a simple example on how to tune parameters of a given algorithm The BayesianSearch object will save: - A .txt file with all the cases explored and the recommendation quality - A _best_model file which contains the trained model and can be loaded with recommender.load_model() - A _best_parameter file which contains a dictionary with all the fit parameters, it can be passed to recommender.fit(**_best_parameter) - A _best_result_validation file which contains a dictionary with the results of the best solution on the validation - A _best_result_test file which contains a dictionary with the results, on the test set, of the best solution chosen using the validation set """ clear() dataReader = PlaylistDataReader() dataReader.generate_datasets() URM_train = dataReader.get_URM_train() # URM_validation = dataReader.get_URM_validation() URM_test = dataReader.get_URM_test() ICM = dataReader.get_ICM() output_root_path = "tuned_parameters" m = OfflineDataLoader() fold, fil = m.get_model(ItemKNNCFRecommender.RECOMMENDER_NAME, training=True) m1 = ItemKNNCFRecommender(URM_train, ICM) m1.loadModel(folder_path=fold, file_name=fil) W_sparse_CF = m1.W_sparse # If directory does not exist, create if not os.path.exists(output_root_path): os.makedirs(output_root_path) collaborative_algorithm_list = [ #P3alphaRecommender, #RP3betaRecommender, #ItemKNNCFRecommender, #UserKNNCFRecommender, # MatrixFactorization_BPR_Cython, # MatrixFactorization_FunkSVD_Cython, # PureSVDRecommender, # Slim_mark1, # Slim_mark2, # ItemTreeRecommender_offline # SLIMElasticNetRecommender, # PartyRecommender_offline # PyramidRecommender_offline # ItemKNNCBFRecommender # PyramidItemTreeRecommender_offline #HybridEightRecommender_offline #ComboRecommender_offline SingleNeuronRecommender_offline # CFWBoostingRecommender ] from parameter_tuning.AbstractClassSearch import EvaluatorWrapper from base.evaluation.Evaluator import SequentialEvaluator evaluator_validation_earlystopping = SequentialEvaluator(URM_test, cutoff_list=[10]) evaluator_test = SequentialEvaluator(URM_test, cutoff_list=[10]) evaluator_validation = EvaluatorWrapper(evaluator_validation_earlystopping) evaluator_test = EvaluatorWrapper(evaluator_test) runParameterSearch_Collaborative_partial = partial( runParameterSearch_Collaborative, URM_train=URM_train, ICM=ICM, W_sparse_CF=W_sparse_CF, metric_to_optimize="MAP", evaluator_validation_earlystopping=evaluator_validation_earlystopping, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test, n_cases=250, output_root_path=output_root_path) for recommender_class in collaborative_algorithm_list: try: runParameterSearch_Collaborative_partial(recommender_class) except Exception as e: print("On recommender {} Exception {}".format( recommender_class, str(e))) traceback.print_exc()
def fit(self, topK=175, shrink=400, similarity="asymmetric", normalize=True, feature_weighting="BM25", save_model=False, best_parameters=False, location="training", submission=False, offline=False, **similarity_args): if offline: m = OfflineDataLoader() folder_path_icf, file_name_icf = m.get_model( self.RECOMMENDER_NAME, training=(not submission)) self.loadModel(folder_path=folder_path_icf, file_name=file_name_icf) else: if best_parameters: m = OfflineDataLoader() folder_path_ucf, file_name_ucf = m.get_parameter( self.RECOMMENDER_NAME) self.loadModel(folder_path=folder_path_ucf, file_name=file_name_ucf) if self.feature_weighting == "none": similarity = Compute_Similarity(self.URM_train.T, **similarity_args) else: if feature_weighting == "BM25": self.URM_train_copy = self.URM_train.astype(np.float32) self.URM_train_copy = to_okapi(self.URM_train) elif feature_weighting == "TF-IDF": self.URM_train_copy = self.URM_train.astype(np.float32) self.URM_train_copy = to_tfidf(self.URM_train) similarity_args = { 'asymmetric_alpha': 0.11483114799990246, 'normalize': True, 'shrink': 450, 'similarity': 'asymmetric', 'topK': 200 } similarity = Compute_Similarity(self.URM_train_copy.T, **similarity_args) else: self.topK = topK self.shrink = shrink self.feature_weighting = feature_weighting similarity_args = {'asymmetric_alpha': 0.0033404951135529437} if self.feature_weighting == "BM25": self.URM_train_copy = self.URM_train.astype(np.float32) self.URM_train_copy = to_okapi(self.URM_train) elif self.feature_weighting == "TF-IDF": self.URM_train_copy = self.URM_train.astype(np.float32) self.URM_train_copy = to_tfidf(self.URM_train) if self.feature_weighting == "none": similarity = Compute_Similarity(self.URM_train.T, shrink=shrink, topK=topK, normalize=normalize, similarity=similarity, **similarity_args) else: similarity = Compute_Similarity(self.URM_train_copy.T, shrink=shrink, topK=topK, normalize=normalize, similarity=similarity, **similarity_args) self.parameters = "sparse_weights= {0}, similarity= {1}, shrink= {2}, neighbourhood={3}, " \ "normalize= {4}".format(self.sparse_weights, similarity, shrink, topK, normalize) if self.sparse_weights: self.W_sparse = similarity.compute_similarity() else: self.W = similarity.compute_similarity() self.W = self.W.toarray() if save_model: self.saveModel("saved_models/submission/", file_name=self.RECOMMENDER_NAME + "_" + location + "_model")
def fit(self, epochs=50, URM_test=None, filterTopPop=False, minRatingsPerUser=1, batch_size=1000, validate_every_N_epochs=1, start_validation_after_N_epochs=0, lambda_i=1e-4, lambda_j=1e-4, learning_rate=0.020, topK=500, sgd_mode='adagrad', save_model = False, best_parameters=False, offline=True,submission=False): self.parameters = "positive_threshold= {0}, sparse_weights= {1}, symmetric= {2},sgd_mode= {3}, lambda_i={4}, " \ "lambda_j={5}, learning_rate={6}, topK={7}, epochs= {8}".format( self.positive_threshold,self.sparse_weights,self.symmetric,self.sgd_mode,lambda_i,lambda_j,learning_rate,topK,epochs) if offline: m = OfflineDataLoader() folder, file = m.get_model(self.RECOMMENDER_NAME, training=(not submission)) self.loadModel(folder_path=folder,file_name=file) else: self.save_model = save_model # Select only positive interactions URM_train_positive = self.URM_train.copy() URM_train_positive.data = URM_train_positive.data >= self.positive_threshold URM_train_positive.eliminate_zeros() if best_parameters: m = OfflineDataLoader() folder_slim, file_slim = m.get_parameter(self.RECOMMENDER_NAME) self.loadModel(folder_path=folder_slim,file_name=file_slim) self.cythonEpoch = Slim_BPR_Cython_Epoch( self.URM_mask, sparse_weights=self.sparse_weights, learning_rate=learning_rate, batch_size=1, symmetric=self.symmetric) result = super(Slim_BPR_Recommender_Cython, self).fit_alreadyInitialized( epochs=epochs, URM_test=URM_test, filterTopPop=filterTopPop, minRatingsPerUser=minRatingsPerUser, batch_size=batch_size, validate_every_N_epochs=validate_every_N_epochs, start_validation_after_N_epochs=start_validation_after_N_epochs) else: self.sgd_mode = sgd_mode self.cythonEpoch = Slim_BPR_Cython_Epoch( self.URM_mask, sparse_weights=self.sparse_weights, topK=topK, learning_rate=learning_rate, li_reg=lambda_i, lj_reg=lambda_j, batch_size=1, symmetric=self.symmetric, sgd_mode=sgd_mode) result = super(Slim_BPR_Recommender_Cython, self).fit_alreadyInitialized( epochs=epochs, URM_test=URM_test, filterTopPop=filterTopPop, minRatingsPerUser=minRatingsPerUser, batch_size=batch_size, validate_every_N_epochs=validate_every_N_epochs, start_validation_after_N_epochs=start_validation_after_N_epochs, lambda_i=lambda_i, lambda_j=lambda_j, learning_rate=learning_rate, topK=topK) return result if save_model: self.saveModel("saved_models/submission/",file_name="SLIM_BPR_Recommender_mark1_submission_model") return self.W
def printOutMapValues(modelList, URM, ICM, modelsSoFar): map_dict = {i: dict() for i in modelsSoFar} m = OfflineDataLoader() for model in modelList: folder = str("/".join(model[1].split("/")[:-1]) + "/") file = model[1].split("/")[-1] if model[0] == "UserKNNCFRecommender": mod = UserKNNCFRecommender(URM) mod.loadModel(folder_path=folder, file_name=file, verbose=False) map_dict[model[0]][model[2]] = mod.MAP # print(model[0], model[2], mod.MAP) elif model[0] == "ItemKNNCFRecommender": mod = ItemKNNCFRecommender(URM) mod.loadModel(folder_path=folder, file_name=file, verbose=False) map_dict[model[0]][model[2]] = mod.MAP # print(model[0], model[2], mod.MAP) elif model[0] == "ItemKNNCBFRecommender": mod = ItemKNNCBFRecommender(URM, ICM) mod.loadModel(folder_path=folder, file_name=file, verbose=False) map_dict[model[0]][model[2]] = mod.MAP # print(model[0], model[2], mod.MAP) elif model[0] == "SLIM_BPR_Recommender_mark1": mod = Slim_mark1(URM) mod.loadModel(folder_path=folder, file_name=file, verbose=False) map_dict[model[0]][model[2]] = mod.MAP # print(model[0], model[2], mod.MAP) elif model[0] == "RP3_Beta_Recommender": mod = RP3betaRecommender(URM) mod.loadModel(folder_path=folder, file_name=file, verbose=False) map_dict[model[0]][model[2]] = mod.MAP # print(model[0], model[2], mod.MAP) elif model[0] == "P3_Alpha_Recommender": mod = P3alphaRecommender(URM) mod.loadModel(folder_path=folder, file_name=file, verbose=False) map_dict[model[0]][model[2]] = mod.MAP # print(model[0], model[2], mod.MAP) elif model[0] == "PureSVD": mod = PureSVDRecommender(URM) mod.loadModel(folder_path=folder, file_name=file, verbose=False) map_dict[model[0]][model[2]] = mod.MAP # print(model[0], model[2], mod.MAP) elif model[0] == "Slim_Elastic_Net_Recommender": mod = SLIMElasticNetRecommender(URM) mod.loadModel(folder_path=folder, file_name=file, verbose=False) map_dict[model[0]][model[2]] = mod.MAP #print(model[0], model[2], mod.MAP) elif model[0] == "SLIM_BPR_Recommender_mark2": mod = Slim_mark2(URM) mod.loadModel(folder_path=folder, file_name=file, verbose=False) map_dict[model[0]][model[2]] = mod.MAP #print(model[0], model[2], mod.MAP) # elif model[0] == "ItemTreeRecommender_offline": # mod = ItemTreeRecommender_offline(URM,ICM) # mod.loadModel(folder_path=folder, file_name=file, verbose=False) # map_dict[model[0]][model[2]] = mod.MAP #print(model[0], model[2], mod.MAP) # elif model[0] == "PartyRecommender_offline": # mod = PartyRecommender_offline(URM) # mod.loadModel(folder_path=folder, file_name=file, verbose=False) # map_dict[model[0]][model[2]] = mod.MAP # #print(model[0], model[2], mod.MAP) elif model[0] == "SingleNeuronRecommender_offline": mod = SingleNeuronRecommender_offline(URM, ICM) mod.loadModel(folder_path=folder, file_name=file, verbose=False) map_dict[model[0]][model[2]] = mod.MAP #print(model[0], model[2], mod.MAP) return map_dict
def fit( self, alpha=1.3167219260598073, beta=15.939928536132701, gamma=0.6048873602128846, delta=1.0527588765188267, epsilon=2.08444591782293, zeta=1.2588273098979674, eta=18.41012777389885, theta=18.000293943452448, # psi = 0.00130805010990942, normalize=False, save_model=False, submission=False, best_parameters=False, offline=False, location="submission"): if offline: m = OfflineDataLoader() folder_path, file_name = m.get_model(self.RECOMMENDER_NAME) self.loadModel(folder_path=folder_path, file_name=file_name) else: if best_parameters: m = OfflineDataLoader() folder_path, file_name = m.get_parameter(self.RECOMMENDER_NAME) self.loadModel(folder_path=folder_path, file_name=file_name) else: self.alpha = alpha self.beta = beta self.gamma = gamma self.delta = delta self.epsilon = epsilon self.zeta = zeta self.eta = eta self.theta = theta # self.psi = psi self.normalize = normalize self.submission = not submission m = OfflineDataLoader() self.m_user_knn_cf = UserKNNCFRecommender(self.URM_train) folder_path_ucf, file_name_ucf = m.get_model( UserKNNCFRecommender.RECOMMENDER_NAME, training=self.submission) self.m_user_knn_cf.loadModel(folder_path=folder_path_ucf, file_name=file_name_ucf) self.m_item_knn_cf = ItemKNNCFRecommender(self.URM_train) folder_path_icf, file_name_icf = m.get_model( ItemKNNCFRecommender.RECOMMENDER_NAME, training=self.submission) self.m_item_knn_cf.loadModel(folder_path=folder_path_icf, file_name=file_name_icf) self.m_item_knn_cbf = ItemKNNCBFRecommender( self.URM_train, self.ICM) folder_path_icf, file_name_icf = m.get_model( ItemKNNCBFRecommender.RECOMMENDER_NAME, training=self.submission) self.m_item_knn_cbf.loadModel(folder_path=folder_path_icf, file_name=file_name_icf) self.m_slim_mark1 = Slim_mark1(self.URM_train) folder_path_slim, file_name_slim = m.get_model( Slim_mark1.RECOMMENDER_NAME, training=self.submission) self.m_slim_mark1.loadModel(folder_path=folder_path_slim, file_name=file_name_slim) self.m_slim_mark2 = Slim_mark2(self.URM_train) folder_path_slim, file_name_slim = m.get_model( Slim_mark2.RECOMMENDER_NAME, training=self.submission) self.m_slim_mark2.loadModel(folder_path=folder_path_slim, file_name=file_name_slim) self.m_alpha = P3alphaRecommender(self.URM_train) folder_path_alpha, file_name_alpha = m.get_model( P3alphaRecommender.RECOMMENDER_NAME, training=self.submission) self.m_alpha.loadModel(folder_path=folder_path_alpha, file_name=file_name_alpha) self.m_beta = RP3betaRecommender(self.URM_train) folder_path_beta, file_name_beta = m.get_model( RP3betaRecommender.RECOMMENDER_NAME, training=self.submission) self.m_beta.loadModel(folder_path=folder_path_beta, file_name=file_name_beta) self.m_slim_elastic = SLIMElasticNetRecommender(self.URM_train) folder_path_elastic, file_name_elastic = m.get_model( SLIMElasticNetRecommender.RECOMMENDER_NAME, training=self.submission) self.m_slim_elastic.loadModel(folder_path=folder_path_elastic, file_name=file_name_elastic) # self.m_cfw = CFWBoostingRecommender(self.URM_train,self.ICM,Slim_mark2,training=self.submission) # fold, file = m.get_model(CFWBoostingRecommender.RECOMMENDER_NAME,training= self.submission) # self.m_cfw.loadModel(folder_path=fold,file_name=file) self.W_sparse_URM = check_matrix(self.m_user_knn_cf.W_sparse, "csr", dtype=np.float32) #print(self.W_sparse_URM.getrow(0).data) self.W_sparse_URM_T = check_matrix(self.m_item_knn_cf.W_sparse, "csr", dtype=np.float32) #print(self.W_sparse_URM_T.getrow(0).data) self.W_sparse_ICM = check_matrix(self.m_item_knn_cbf.W_sparse, "csr", dtype=np.float32) #print(self.W_sparse_ICM.getrow(0).data) self.W_sparse_Slim1 = check_matrix(self.m_slim_mark1.W, "csr", dtype=np.float32) #print(self.W_sparse_Slim1.getrow(0).data) self.W_sparse_Slim2 = check_matrix(self.m_slim_mark2.W_sparse, "csr", dtype=np.float32) #print(self.W_sparse_Slim2.getrow(0).data) self.W_sparse_alpha = check_matrix(self.m_alpha.W_sparse, "csr", dtype=np.float32) #print(self.W_sparse_alpha.getrow(0).data) self.W_sparse_beta = check_matrix(self.m_beta.W_sparse, "csr", dtype=np.float32) #print(self.W_sparse_beta.getrow(0).data) self.W_sparse_elastic = check_matrix(self.m_slim_elastic.W_sparse, "csr", dtype=np.float32) #print(self.W_sparse_elastic.getrow(0).data) #self.W_sparse_cfw = check_matrix(self.m_cfw.W_sparse,"csr",dtype=np.float32) # Precomputations self.matrix_wo_user = self.alpha * self.W_sparse_URM_T +\ self.beta * self.W_sparse_ICM +\ self.gamma * self.W_sparse_Slim1 +\ self.delta * self.W_sparse_Slim2 +\ self.epsilon * self.W_sparse_alpha +\ self.zeta * self.W_sparse_beta + \ self.eta * self.W_sparse_elastic #+ \ #self.psi * self.W_sparse_cfw self.parameters = "alpha={}, beta={}, gamma={},delta={}, epsilon={}, zeta={}, eta={}, theta={}".format( self.alpha, self.beta, self.gamma, self.delta, self.epsilon, self.zeta, self.eta, self.theta) if save_model: self.saveModel("saved_models/" + location + "/", file_name=self.RECOMMENDER_NAME)
def fit(self, alpha=0.0500226666668111, beta=0.9996482062853596, gamma=0.36595766622100967, theta=0.22879224932897924, omega=0.5940982982110466, normalize=False, save_model=False, submission=False, best_parameters=False): if best_parameters: m = OfflineDataLoader() folder_path, file_name = m.get_parameter(self.RECOMMENDER_NAME) self.loadModel(folder_path=folder_path, file_name=file_name) else: self.alpha = alpha self.beta = beta self.gamma = gamma self.theta = theta self.omega = omega self.normalize = normalize self.submission = not submission m = OfflineDataLoader() self.m_user_knn_cf = UserKNNCFRecommender(self.URM_train) folder_path_ucf, file_name_ucf = m.get_model( UserKNNCFRecommender.RECOMMENDER_NAME, training=self.submission) self.m_user_knn_cf.loadModel(folder_path=folder_path_ucf, file_name=file_name_ucf) self.m_item_knn_cf = ItemKNNCFRecommender(self.URM_train) folder_path_icf, file_name_icf = m.get_model( ItemKNNCFRecommender.RECOMMENDER_NAME, training=self.submission) self.m_item_knn_cf.loadModel(folder_path=folder_path_icf, file_name=file_name_icf) self.m_item_knn_cbf = ItemKNNCBFRecommender(self.URM_train, self.ICM) folder_path_icbf, file_name_icbf = m.get_model( ItemKNNCBFRecommender.RECOMMENDER_NAME, training=self.submission) self.m_item_knn_cbf.loadModel(folder_path=folder_path_icbf, file_name=file_name_icbf) self.m_slim_mark1 = Slim_mark1(self.URM_train) folder_path_slim, file_name_slim = m.get_model( Slim_mark1.RECOMMENDER_NAME, training=self.submission) self.m_slim_mark1.loadModel(folder_path=folder_path_slim, file_name=file_name_slim) self.m_alpha = P3alphaRecommender(self.URM_train) folder_path_alpha, file_name_alpha = m.get_model( P3alphaRecommender.RECOMMENDER_NAME, training=self.submission) self.m_alpha.loadModel(folder_path=folder_path_alpha, file_name=file_name_alpha) self.m_beta = RP3betaRecommender(self.URM_train) folder_path_beta, file_name_beta = m.get_model( RP3betaRecommender.RECOMMENDER_NAME, training=self.submission) self.m_beta.loadModel(folder_path=folder_path_beta, file_name=file_name_beta) self.W_sparse_URM = check_matrix(self.m_user_knn_cf.W_sparse, "csr", dtype=np.float32) self.W_sparse_ICM = check_matrix(self.m_item_knn_cbf.W_sparse, "csr", dtype=np.float32) self.W_sparse_URM_T = check_matrix(self.m_item_knn_cf.W_sparse, "csr", dtype=np.float32) self.W_sparse_Slim = check_matrix(self.m_slim_mark1.W, "csr", dtype=np.float32) self.W_sparse_alpha = check_matrix(self.m_alpha.W_sparse, "csr", dtype=np.float32) self.W_sparse_beta = check_matrix(self.m_beta.W_sparse, "csr", dtype=np.float32) # Precomputations self.matrix_first_branch = self.alpha * self.W_sparse_ICM + ( 1 - self.alpha) * self.W_sparse_Slim self.matrix_right = self.beta * self.matrix_first_branch + ( 1 - self.beta) * self.W_sparse_URM_T self.matrix_alpha_beta = self.gamma * self.W_sparse_alpha + ( 1 - self.gamma) * self.W_sparse_beta self.parameters = "alpha={}, beta={}, gamma={}, omega={}, theta={}".format( self.alpha, self.beta, self.gamma, self.omega, self.theta) if save_model: self.saveModel("saved_models/submission/", file_name="ItemTreeRecommender_offline")
def fit(self, alpha=0.1, beta=0.1, gamma=0.1, theta=0.1, delta=0.1, epsilon=0.1, normalize=False, save_model=False, submission=False, best_parameters=False, location="submission"): if best_parameters: m = OfflineDataLoader() folder_path, file_name = m.get_parameter(self.RECOMMENDER_NAME) self.loadModel(folder_path=folder_path, file_name=file_name) else: self.alpha = alpha self.beta = beta self.gamma = gamma self.theta = theta self.delta = delta self.epsilon = epsilon self.normalize = normalize self.submission = not submission m = OfflineDataLoader() self.m_party = PartyRecommender_offline(self.URM_train) folder_path_ucf, file_name_ucf = m.get_model( PartyRecommender_offline.RECOMMENDER_NAME, training=self.submission) self.m_party.loadModel(folder_path=folder_path_ucf, file_name=file_name_ucf) self.m_pyramid = PyramidRecommender_offline(self.URM_train) folder_path_icf, file_name_icf = m.get_model( PyramidRecommender_offline.RECOMMENDER_NAME, training=self.submission) self.m_pyramid.loadModel(folder_path=folder_path_icf, file_name=file_name_icf) self.m_pyitem = PyramidItemTreeRecommender_offline( self.URM_train, self.ICM) folder_path_slim, file_name_slim = m.get_model( PyramidItemTreeRecommender_offline.RECOMMENDER_NAME, training=self.submission) self.m_pyitem.loadModel(folder_path=folder_path_slim, file_name=file_name_slim) self.m_8 = HybridEightRecommender_offline(self.URM_train, self.ICM) folder_path_alpha, file_name_alpha = m.get_model( HybridEightRecommender_offline.RECOMMENDER_NAME, training=self.submission) self.m_8.loadModel(folder_path=folder_path_alpha, file_name=file_name_alpha) self.m_sn = SingleNeuronRecommender_offline(self.URM_train, self.ICM) folder_path_alpha, file_name_alpha = m.get_model( SingleNeuronRecommender_offline.RECOMMENDER_NAME, training=self.submission) self.m_sn.loadModel(folder_path=folder_path_alpha, file_name=file_name_alpha) self.m_cfw = CFWBoostingRecommender(self.URM_train, self.ICM, Slim_mark2, training=self.submission) fold, file = m.get_model(CFWBoostingRecommender.RECOMMENDER_NAME, training=self.submission) self.m_cfw.loadModel(folder_path=fold, file_name=file) self.parameters = "alpha={}, beta={}, gamma={}, theta={},delta={} ".format( self.alpha, self.beta, self.gamma, self.theta, self.delta) if save_model: self.saveModel("saved_models/" + location + "/", file_name=self.RECOMMENDER_NAME)
def extract_models(self, dataReader, submission=False): print( "Configurator: The models are being extracted from the config file" ) recsys = list() models = list(self.configs.models) data = dataReader.get_URM_train() if submission: data = dataReader.get_URM_all() for model in models: # User Collaborative Filtering with KNN if model["model_name"] == "user_knn_cf": recsys.append( UserKNNCFRecommender( data, sparse_weights=model["sparse_weights"])) # Item Collaborative Filtering with KNN elif model["model_name"] == "item_knn_cf": recsys.append( ItemKNNCFRecommender( data, sparse_weights=model["sparse_weights"])) # Item Content Based Filtering with KNN elif model["model_name"] == "item_knn_cbf": recsys.append( ItemKNNCBFRecommender( data, dataReader.get_ICM(), sparse_weights=model["sparse_weights"])) # Slim BPR with Python elif model["model_name"] == "slim_bpr_python": recsys.append( Slim_BPR_Recommender_Python( data, positive_threshold=model["positive_threshold"], sparse_weights=model["sparse_weights"])) # Slim BPR with Cython Extension elif model["model_name"] == "slim_bpr_mark1": recsys.append( Slim_mark1(data, positive_threshold=model["positive_threshold"], recompile_cython=model["recompile_cython"], symmetric=model["symmetric"])) elif model["model_name"] == "slim_bpr_mark2": recsys.append( Slim_mark2(data, positive_threshold=model["positive_threshold"], recompile_cython=model["recompile_cython"], symmetric=model["symmetric"])) # Funk SVD Recommender elif model["model_name"] == "funksvd": recsys.append(FunkSVD(data)) elif model["model_name"] == "asysvd": recsys.append(AsySVD(data)) elif model["model_name"] == "puresvd": recsys.append(PureSVDRecommender(data)) elif model["model_name"] == "mf_bpr_cython": recsys.append( MF_BPR_Cython(data, recompile_cython=model["recompile_cython"])) elif model["model_name"] == "mf_cython": recsys.append( MatrixFactorization_Cython( data, positive_threshold=model["positive_threshold"], URM_validation=dataReader.get_URM_test(), recompile_cython=model["recompile_cython"], algorithm=model["algorithm"])) elif model["model_name"] == "ials_numpy": recsys.append(IALS_numpy()) elif model["model_name"] == "bprmf": recsys.append(BPRMF()) elif model["model_name"] == "user_item_avg": recsys.append( UserItemAvgRecommender( data, dataReader.get_UCM(), dataReader.get_ICM(), sparse_weights=model["sparse_weights"], verbose=model["verbose"], similarity_mode=model["similarity_mode"], normalize=model["normalize"], alpha=model["alpha"])) elif model["model_name"] == "2levelhybrid": recsys.append( TwoLevelHybridRecommender( data, dataReader.get_UCM(), dataReader.get_ICM(), sparse_weights=model["sparse_weights"], verbose=model["verbose"], similarity_mode=model["similarity_mode"], normalize=model["normalize"], alpha=model["alpha"], avg=model["avg"])) elif model["model_name"] == "seqrand": recsys.append( SeqRandRecommender( data, dataReader.get_URM_train_tfidf(), dataReader.get_UCM(), dataReader.get_ICM(), dataReader.get_target_playlists_seq(), sparse_weights=model["sparse_weights"], verbose=model["verbose"], similarity_mode=model["similarity_mode"], normalize=model["normalize"], alpha=model["alpha"], beta=model["beta"], gamma=model["gamma"])) elif model["model_name"] == "itemtree": recsys.append( ItemTreeRecommender( data, dataReader.get_URM_train_okapi(), dataReader.get_ICM(), sparse_weights=model["sparse_weights"])) elif model["model_name"] == "itemtree_offline": recsys.append( ItemTreeRecommender_offline(data, dataReader.get_ICM())) elif model["model_name"] == "slim": recsys.append( Slim(data, sparse_weights=model["sparse_weights"], normalize=model["normalize"])) elif model["model_name"] == "p3alpha": recsys.append(P3alphaRecommender(data)) elif model["model_name"] == "rp3beta": recsys.append(RP3betaRecommender(data)) elif model["model_name"] == "slim_elastic": recsys.append(SLIMElasticNetRecommender(data)) elif model["model_name"] == "party": recsys.append(PartyRecommender_offline(data)) elif model["model_name"] == "pyramid": recsys.append(PyramidRecommender_offline(data)) elif model["model_name"] == "pyramid_item_tree": recsys.append( PyramidItemTreeRecommender_offline(data, dataReader.get_ICM())) elif model["model_name"] == "hybrid_eight": recsys.append( HybridEightRecommender_offline(data, dataReader.get_ICM())) elif model["model_name"] == "combo": recsys.append( ComboRecommender_offline(data, dataReader.get_ICM())) elif model["model_name"] == "neuron": recsys.append( SingleNeuronRecommender_offline(data, dataReader.get_ICM())) elif model["model_name"] == "cfw": m = OfflineDataLoader() #fold,file = m.get_model(Slim_mark2.RECOMMENDER_NAME,training=True) m1 = Slim_mark2(data) #m1.loadModel(folder_path=fold,file_name=file) recsys.append( CFWBoostingRecommender(data, dataReader.get_ICM(), Slim_mark2)) print("Configurator: Models are extracted") return recsys
def fit(self, alpha=0.0029711141561171717, beta=0.9694720669481413, gamma=0.9635187725527589, theta=0.09930388487311004, omega=0.766047309541692, coeff = 5.4055892529064735, normalize=False, save_model=False, submission=False, best_parameters=False, location="submission"): if best_parameters: m = OfflineDataLoader() folder_path, file_name = m.get_parameter(self.RECOMMENDER_NAME) self.loadModel(folder_path=folder_path, file_name=file_name) else: self.alpha = alpha self.beta = beta self.gamma = gamma self.theta = theta self.omega = omega self.coeff = coeff self.normalize = normalize self.submission = not submission m = OfflineDataLoader() self.m_user_knn_cf = UserKNNCFRecommender(self.URM_train) folder_path_ucf, file_name_ucf = m.get_model(UserKNNCFRecommender.RECOMMENDER_NAME, training=self.submission) self.m_user_knn_cf.loadModel(folder_path=folder_path_ucf, file_name=file_name_ucf) self.m_item_knn_cf = ItemKNNCFRecommender(self.URM_train) folder_path_icf, file_name_icf = m.get_model(ItemKNNCFRecommender.RECOMMENDER_NAME, training=self.submission) self.m_item_knn_cf.loadModel(folder_path=folder_path_icf, file_name=file_name_icf) self.m_slim_mark2 = Slim_mark2(self.URM_train) folder_path_slim, file_name_slim = m.get_model(Slim_mark2.RECOMMENDER_NAME, training=self.submission) self.m_slim_mark2.loadModel(folder_path=folder_path_slim, file_name=file_name_slim) self.m_alpha = P3alphaRecommender(self.URM_train) folder_path_alpha, file_name_alpha = m.get_model(P3alphaRecommender.RECOMMENDER_NAME, training=self.submission) self.m_alpha.loadModel(folder_path=folder_path_alpha, file_name=file_name_alpha) self.m_beta = RP3betaRecommender(self.URM_train) folder_path_beta, file_name_beta = m.get_model(RP3betaRecommender.RECOMMENDER_NAME, training=self.submission) self.m_beta.loadModel(folder_path=folder_path_beta, file_name=file_name_beta) self.m_slim_elastic = SLIMElasticNetRecommender(self.URM_train) folder_path_elastic, file_name_elastic = m.get_model(SLIMElasticNetRecommender.RECOMMENDER_NAME, training=self.submission) self.m_slim_elastic.loadModel(folder_path=folder_path_elastic, file_name=file_name_elastic) self.W_sparse_URM = check_matrix(self.m_user_knn_cf.W_sparse, "csr", dtype=np.float32) self.W_sparse_URM_T = check_matrix(self.m_item_knn_cf.W_sparse, "csr", dtype=np.float32) self.W_sparse_Slim = check_matrix(self.m_slim_mark2.W_sparse, "csr", dtype=np.float32) self.W_sparse_alpha = check_matrix(self.m_alpha.W_sparse, "csr", dtype=np.float32) self.W_sparse_beta = check_matrix(self.m_beta.W_sparse, "csr", dtype=np.float32) self.W_sparse_elastic = check_matrix(self.m_slim_elastic.W_sparse, "csr", dtype=np.float32) # Precomputations self.matrix_alpha_beta = self.alpha * self.W_sparse_alpha + (1 - self.alpha) * self.W_sparse_beta self.matrix_level1 = self.beta * self.W_sparse_Slim + (1 - self.beta) * self.W_sparse_URM_T self.parameters = "alpha={}, beta={}, gamma={}, theta={}, omega={}, coeff={}".format(self.alpha, self.beta, self.gamma, self.theta, self.omega, self.coeff) if save_model: self.saveModel("saved_models/"+location+"/", file_name=self.RECOMMENDER_NAME)
def fit(self, epochs=100, logFile=None, batch_size=1000, lambda_i=1e-4, lambda_j=1e-4, learning_rate=0.025, topK=200, sgd_mode='adagrad', gamma=0.995, beta_1=0.9, beta_2=0.999, stop_on_validation=False, lower_validatons_allowed=5, validation_metric="MAP", evaluator_object=None, validation_every_n=1, save_model=False, best_parameters=False, offline=True, submission=False): self.parameters = "epochs={0}, batch_size={1}, lambda_i={2}, lambda_j={3}, learning_rate={4}, topK={5}, sgd_mode={6" \ "}, gamma={7}, beta_1={8}, beta_2={9},".format(epochs,batch_size,lambda_i,lambda_j, learning_rate,topK,sgd_mode,gamma,beta_1,beta_2) if offline: m = OfflineDataLoader() folder, file = m.get_model(self.RECOMMENDER_NAME, training=(not submission)) self.loadModel(folder_path=folder, file_name=file) else: # Import compiled module from models.Slim_mark2.Cython.SLIM_BPR_Cython_Epoch import SLIM_BPR_Cython_Epoch # Select only positive interactions URM_train_positive = self.URM_train.copy() URM_train_positive.data = URM_train_positive.data >= self.positive_threshold URM_train_positive.eliminate_zeros() self.sgd_mode = sgd_mode self.epochs = epochs self.cythonEpoch = SLIM_BPR_Cython_Epoch( self.URM_mask, train_with_sparse_weights=self.train_with_sparse_weights, final_model_sparse_weights=self.sparse_weights, topK=topK, learning_rate=learning_rate, li_reg=lambda_i, lj_reg=lambda_j, batch_size=1, symmetric=self.symmetric, sgd_mode=sgd_mode, gamma=gamma, beta_1=beta_1, beta_2=beta_2) if (topK != False and topK < 1): raise ValueError( "TopK not valid. Acceptable values are either False or a positive integer value. Provided value was '{}'" .format(topK)) self.topK = topK if validation_every_n is not None: self.validation_every_n = validation_every_n else: self.validation_every_n = np.inf if evaluator_object is None and stop_on_validation: evaluator_object = SequentialEvaluator(self.URM_validation, [10]) self.batch_size = batch_size self.lambda_i = lambda_i self.lambda_j = lambda_j self.learning_rate = learning_rate self._train_with_early_stopping( epochs, validation_every_n, stop_on_validation, validation_metric, lower_validatons_allowed, evaluator_object, algorithm_name=self.RECOMMENDER_NAME) self.get_S_incremental_and_set_W() sys.stdout.flush() if save_model: self.saveModel("saved_models/submission/", file_name=self.RECOMMENDER_NAME)
def fit(self, l1_ratio=0.1, positive_only=True, topK=400, save_model=False, best_parameters=False, offline=False, submission=False): self.parameters = "l1_ratio= {}, topK= {},alpha= {},tol= {},max_iter= {}".format( l1_ratio, topK, 0.0001, 1e-4, 100) if offline: m = OfflineDataLoader() folder, file = m.get_model(self.RECOMMENDER_NAME, training=(not submission)) self.loadModel(folder_path=folder, file_name=file) else: assert l1_ratio >= 0 and l1_ratio <= 1, "SLIM_ElasticNet: l1_ratio must be between 0 and 1, provided value was {}".format( l1_ratio) self.l1_ratio = l1_ratio self.positive_only = positive_only self.topK = topK # initialize the ElasticNet model self.model = ElasticNet(alpha=0.0001, l1_ratio=self.l1_ratio, positive=self.positive_only, fit_intercept=False, copy_X=False, precompute=True, selection='random', max_iter=100, tol=1e-4) URM_train = check_matrix(self.URM_train, 'csc', dtype=np.float32) n_items = URM_train.shape[1] # Use array as it reduces memory requirements compared to lists dataBlock = 10000000 rows = np.zeros(dataBlock, dtype=np.int32) cols = np.zeros(dataBlock, dtype=np.int32) values = np.zeros(dataBlock, dtype=np.float32) numCells = 0 start_time = time.time() start_time_printBatch = start_time # fit each item's factors sequentially (not in parallel) for currentItem in tqdm(range(n_items)): # get the target column y = URM_train[:, currentItem].toarray() # set the j-th column of X to zero start_pos = URM_train.indptr[currentItem] end_pos = URM_train.indptr[currentItem + 1] current_item_data_backup = URM_train.data[ start_pos:end_pos].copy() URM_train.data[start_pos:end_pos] = 0.0 # fit one ElasticNet model per column self.model.fit(URM_train, y) nonzero_model_coef_index = self.model.sparse_coef_.indices nonzero_model_coef_value = self.model.sparse_coef_.data local_topK = min(len(nonzero_model_coef_value) - 1, self.topK) relevant_items_partition = ( -nonzero_model_coef_value ).argpartition(local_topK)[0:local_topK] relevant_items_partition_sorting = np.argsort( -nonzero_model_coef_value[relevant_items_partition]) ranking = relevant_items_partition[ relevant_items_partition_sorting] for index in range(len(ranking)): if numCells == len(rows): rows = np.concatenate( (rows, np.zeros(dataBlock, dtype=np.int32))) cols = np.concatenate( (cols, np.zeros(dataBlock, dtype=np.int32))) values = np.concatenate( (values, np.zeros(dataBlock, dtype=np.float32))) rows[numCells] = nonzero_model_coef_index[ranking[index]] cols[numCells] = currentItem values[numCells] = nonzero_model_coef_value[ranking[index]] numCells += 1 # finally, replace the original values of the j-th column URM_train.data[start_pos:end_pos] = current_item_data_backup if time.time( ) - start_time_printBatch > 300 or currentItem == n_items - 1: print( "Processed {} ( {:.2f}% ) in {:.2f} minutes. Items per second: {:.0f}" .format( currentItem + 1, 100.0 * float(currentItem + 1) / n_items, (time.time() - start_time) / 60, float(currentItem) / (time.time() - start_time))) sys.stdout.flush() sys.stderr.flush() start_time_printBatch = time.time() # generate the sparse weight matrix self.W_sparse = sps.csr_matrix( (values[:numCells], (rows[:numCells], cols[:numCells])), shape=(n_items, n_items), dtype=np.float32) if save_model: self.saveModel("saved_models/submission/", file_name=self.RECOMMENDER_NAME)
def fit(self, topK=400, shrink=200, similarity='cosine', feature_weighting="BM25", normalize=True, save_model=False, best_parameters=False, offline=False, submission=False, location="submission", **similarity_args): #similarity_args = {'tversky_alpha': 0.8047100184165605, 'tversky_beta': 1.9775806370926445} #self.feature_weighting = feature_weighting if offline: m = OfflineDataLoader() folder_path_icf, file_name_icf = m.get_model( self.RECOMMENDER_NAME, training=(not submission)) self.loadModel(folder_path=folder_path_icf, file_name=file_name_icf) else: if best_parameters: m = OfflineDataLoader() folder_path_icf, file_name_icf = m.get_parameter( self.RECOMMENDER_NAME) self.loadModel(folder_path=folder_path_icf, file_name=file_name_icf) #similarity_args = {'normalize': True, 'shrink': 0, 'similarity': 'tversky', 'topK': 20, 'tversky_alpha': 0.18872151621891953, 'tversky_beta': 1.99102432161935} similarity_args = { 'feature_weighting': 'BM25', 'normalize': True, 'shrink': 200, 'similarity': 'cosine', 'topK': 400 } if self.feature_weighting == "none": pass if self.feature_weighting == "BM25": self.URM_train_copy = self.URM_train.astype(np.float32) self.URM_train_copy = to_okapi(self.URM_train) elif self.feature_weighting == "TF-IDF": self.URM_train_copy = self.URM_train.astype(np.float32) self.URM_train_copy = to_tfidf(self.URM_train) similarity = Compute_Similarity(self.URM_train_copy, **similarity_args) else: self.topK = topK self.shrink = shrink self.feature_weighting = feature_weighting if self.feature_weighting == "BM25": self.URM_train_copy = self.URM_train.astype(np.float32) self.URM_train_copy = to_okapi(self.URM_train) elif self.feature_weighting == "TF-IDF": self.URM_train_copy = self.URM_train.astype(np.float32) self.URM_train_copy = to_tfidf(self.URM_train) if self.feature_weighting == "none": similarity = Compute_Similarity(self.URM_train, shrink=shrink, topK=topK, normalize=normalize, similarity=similarity, **similarity_args) else: similarity = Compute_Similarity(self.URM_train_copy, shrink=shrink, topK=topK, normalize=normalize, similarity=similarity, **similarity_args) self.parameters = "sparse_weights= {0}, similarity= {1}, shrink= {2}, neighbourhood={3}, normalize={4}".format( self.sparse_weights, similarity, shrink, topK, normalize) if self.sparse_weights: self.W_sparse = similarity.compute_similarity() else: self.W = similarity.compute_similarity() self.W = self.W.toarray() if save_model: self.saveModel("saved_models/" + location + "/", file_name=self.RECOMMENDER_NAME + "_" + location + "_model")
def fit(self, alpha=0.80849266253816, beta=0.7286503831547066, gamma=0.02895704968752022, sigma=0.453342, tau=0.542421, chi=1.8070865821028037, psi=4.256005405227253, omega=5.096018341419944, coeff=39.966898886531645, normalize=False, save_model=False, submission=False, best_parameters=False, offline=False, location="submission"): if offline: m = OfflineDataLoader() folder_path, file_name = m.get_model(self.RECOMMENDER_NAME) self.loadModel(folder_path=folder_path, file_name=file_name) else: if best_parameters: m = OfflineDataLoader() folder_path, file_name = m.get_parameter(self.RECOMMENDER_NAME) self.loadModel(folder_path=folder_path, file_name=file_name) else: self.alpha = alpha self.beta = beta self.gamma = gamma self.sigma = sigma self.tau = tau self.chi = chi self.psi = psi self.omega = omega self.coeff = coeff self.normalize = normalize self.submission = not submission m = OfflineDataLoader() self.m_user_knn_cf = UserKNNCFRecommender(self.URM_train) folder_path_ucf, file_name_ucf = m.get_model( UserKNNCFRecommender.RECOMMENDER_NAME, training=self.submission) self.m_user_knn_cf.loadModel(folder_path=folder_path_ucf, file_name=file_name_ucf) self.m_item_knn_cf = ItemKNNCFRecommender(self.URM_train) folder_path_icf, file_name_icf = m.get_model( ItemKNNCFRecommender.RECOMMENDER_NAME, training=self.submission) self.m_item_knn_cf.loadModel(folder_path=folder_path_icf, file_name=file_name_icf) self.m_item_knn_cbf = ItemKNNCBFRecommender( self.URM_train, self.ICM) folder_path_icf, file_name_icf = m.get_model( ItemKNNCBFRecommender.RECOMMENDER_NAME, training=self.submission) self.m_item_knn_cbf.loadModel(folder_path=folder_path_icf, file_name=file_name_icf) self.m_slim_mark1 = Slim_mark1(self.URM_train) folder_path_slim, file_name_slim = m.get_model( Slim_mark1.RECOMMENDER_NAME, training=self.submission) self.m_slim_mark1.loadModel(folder_path=folder_path_slim, file_name=file_name_slim) self.m_slim_mark2 = Slim_mark2(self.URM_train) folder_path_slim, file_name_slim = m.get_model( Slim_mark2.RECOMMENDER_NAME, training=self.submission) self.m_slim_mark2.loadModel(folder_path=folder_path_slim, file_name=file_name_slim) self.m_alpha = P3alphaRecommender(self.URM_train) folder_path_alpha, file_name_alpha = m.get_model( P3alphaRecommender.RECOMMENDER_NAME, training=self.submission) self.m_alpha.loadModel(folder_path=folder_path_alpha, file_name=file_name_alpha) self.m_beta = RP3betaRecommender(self.URM_train) folder_path_beta, file_name_beta = m.get_model( RP3betaRecommender.RECOMMENDER_NAME, training=self.submission) self.m_beta.loadModel(folder_path=folder_path_beta, file_name=file_name_beta) self.m_slim_elastic = SLIMElasticNetRecommender(self.URM_train) folder_path_elastic, file_name_elastic = m.get_model( SLIMElasticNetRecommender.RECOMMENDER_NAME, training=self.submission) self.m_slim_elastic.loadModel(folder_path=folder_path_elastic, file_name=file_name_elastic) self.W_sparse_URM = check_matrix(self.m_user_knn_cf.W_sparse, "csr", dtype=np.float32) #print(self.W_sparse_URM.getrow(0).data) self.W_sparse_URM_T = check_matrix(self.m_item_knn_cf.W_sparse, "csr", dtype=np.float32) #print(self.W_sparse_URM_T.getrow(0).data) self.W_sparse_ICM = check_matrix(self.m_item_knn_cbf.W_sparse, "csr", dtype=np.float32) #print(self.W_sparse_ICM.getrow(0).data) self.W_sparse_Slim1 = check_matrix(self.m_slim_mark1.W, "csr", dtype=np.float32) #print(self.W_sparse_Slim1.getrow(0).data) self.W_sparse_Slim2 = check_matrix(self.m_slim_mark2.W_sparse, "csr", dtype=np.float32) #print(self.W_sparse_Slim2.getrow(0).data) self.W_sparse_alpha = check_matrix(self.m_alpha.W_sparse, "csr", dtype=np.float32) #print(self.W_sparse_alpha.getrow(0).data) self.W_sparse_beta = check_matrix(self.m_beta.W_sparse, "csr", dtype=np.float32) #print(self.W_sparse_beta.getrow(0).data) self.W_sparse_elastic = check_matrix(self.m_slim_elastic.W_sparse, "csr", dtype=np.float32) #print(self.W_sparse_elastic.getrow(0).data) # Precomputations #TODO self.matrix_alpha_beta = self.alpha * self.W_sparse_alpha + ( 1 - self.alpha) * self.W_sparse_beta self.matrix_slim = self.beta * self.W_sparse_Slim2 + ( (1 - self.beta) * self.W_sparse_elastic * self.coeff) + self.sigma * self.W_sparse_Slim1 self.parameters = "alpha={}, beta={}, gamma={},sigma={}, tau={}, chi={}, psi={}, omega={}, coeff={}".format( self.alpha, self.beta, self.gamma, self.sigma, self.tau, self.chi, self.psi, self.omega, self.coeff) if save_model: self.saveModel("saved_models/" + location + "/", file_name=self.RECOMMENDER_NAME)
def fit(self, topK=100, alpha=1., min_rating=0, implicit=False, normalize_similarity=False,save_model=False,best_parameters=False,location="training"): if best_parameters: m = OfflineDataLoader() folder_alpha, file_alpha = m.get_parameter(self.RECOMMENDER_NAME) self.loadModel(folder_path=folder_alpha,file_name=file_alpha) else: self.topK = topK self.alpha = alpha self.normalize_similarity = normalize_similarity self.min_rating = min_rating self.implicit = implicit self.parameters = "alpha={}, min_rating={}, topk={}, implicit={}, normalize_similarity={})".format(self.alpha, self.min_rating, self.topK, self.implicit, self.normalize_similarity) if self.min_rating > 0: self.URM_train.data[self.URM_train.data < self.min_rating] = 0 self.URM_train.eliminate_zeros() if self.implicit: self.URM_train.data = np.ones(self.URM_train.data.size, dtype=np.float32) #Pui is the row-normalized urm Pui = normalize(self.URM_train, norm='l1', axis=1) #Piu is the column-normalized, "boolean" urm transposed X_bool = self.URM_train.transpose(copy=True) X_bool.data = np.ones(X_bool.data.size, np.float32) #ATTENTION: axis is still 1 because i transposed before the normalization Piu = normalize(X_bool, norm='l1', axis=1) del(X_bool) # Alfa power if self.alpha != 1.: Pui = Pui.power(self.alpha) Piu = Piu.power(self.alpha) # Final matrix is computed as Pui * Piu * Pui # Multiplication unpacked for memory usage reasons block_dim = 200 d_t = Piu # Use array as it reduces memory requirements compared to lists dataBlock = 10000000 rows = np.zeros(dataBlock, dtype=np.int32) cols = np.zeros(dataBlock, dtype=np.int32) values = np.zeros(dataBlock, dtype=np.float32) numCells = 0 start_time = time.time() start_time_printBatch = start_time for current_block_start_row in range(0, Pui.shape[1], block_dim): if current_block_start_row + block_dim > Pui.shape[1]: block_dim = Pui.shape[1] - current_block_start_row similarity_block = d_t[current_block_start_row:current_block_start_row + block_dim, :] * Pui similarity_block = similarity_block.toarray() for row_in_block in range(block_dim): row_data = similarity_block[row_in_block, :] row_data[current_block_start_row + row_in_block] = 0 best = row_data.argsort()[::-1][:self.topK] notZerosMask = row_data[best] != 0.0 values_to_add = row_data[best][notZerosMask] cols_to_add = best[notZerosMask] for index in range(len(values_to_add)): if numCells == len(rows): rows = np.concatenate((rows, np.zeros(dataBlock, dtype=np.int32))) cols = np.concatenate((cols, np.zeros(dataBlock, dtype=np.int32))) values = np.concatenate((values, np.zeros(dataBlock, dtype=np.float32))) rows[numCells] = current_block_start_row + row_in_block cols[numCells] = cols_to_add[index] values[numCells] = values_to_add[index] numCells += 1 if time.time() - start_time_printBatch > 60: print("Processed {} ( {:.2f}% ) in {:.2f} minutes. Rows per second: {:.0f}".format( current_block_start_row, 100.0 * float(current_block_start_row) / Pui.shape[1], (time.time() - start_time) / 60, float(current_block_start_row) / (time.time() - start_time))) sys.stdout.flush() sys.stderr.flush() start_time_printBatch = time.time() self.W_sparse = sps.coo_matrix((values[:numCells], (rows[:numCells], cols[:numCells])), shape=(Pui.shape[1], Pui.shape[1])) self.W_sparse = check_matrix(self.W_sparse,"csr",dtype=np.float32) if self.normalize_similarity: self.W_sparse = normalize(self.W_sparse, norm='l1', axis=1) if self.topK != False: self.W_sparse = similarityMatrixTopK(self.W_sparse, forceSparseOutput = True, k=self.topK) self.sparse_weights = True if save_model: self.saveModel("saved_models/" +location+"/",file_name=self.RECOMMENDER_NAME+ "_"+location+"_model")