def fit(self, epochs=30, logFile=None, URM_test=None, minRatingsPerUser=1, batch_size=1000, validate_every_N_epochs=1, start_validation_after_N_epochs=0, lambda_i=0.0025, lambda_j=0.00025, learning_rate=0.05, topK=False, sgd_mode='adagrad'): self.eligibleUsers = [] # Select only positive interactions URM_train_positive = self.URM_train.copy() URM_train_positive.data = URM_train_positive.data >= self.positive_threshold URM_train_positive.eliminate_zeros() for user_id in range(self.n_users): start_pos = URM_train_positive.indptr[user_id] end_pos = URM_train_positive.indptr[user_id + 1] if len(URM_train_positive.indices[start_pos:end_pos]) > 0: self.eligibleUsers.append(user_id) self.eligibleUsers = np.array(self.eligibleUsers, dtype=np.int64) self.sgd_mode = sgd_mode # Import compiled module from SLIM_BPR.Cython.SLIM_BPR_Cython_Epoch import SLIM_BPR_Cython_Epoch self.cythonEpoch = SLIM_BPR_Cython_Epoch(self.URM_mask, self.sparse_weights, self.eligibleUsers, topK=topK, learning_rate=learning_rate, batch_size=1, sgd_mode=sgd_mode) # Cal super.fit to start training super(SLIM_BPR_Cython, self).fit_alreadyInitialized( epochs=epochs, logFile=logFile, URM_test=URM_test, minRatingsPerUser=minRatingsPerUser, batch_size=batch_size, validate_every_N_epochs=validate_every_N_epochs, start_validation_after_N_epochs=start_validation_after_N_epochs, lambda_i=lambda_i, lambda_j=lambda_j, learning_rate=learning_rate, topK=topK)
def fit(self, epochs=30, logFile=None, URM_test=None, filterTopPop=False, minRatingsPerUser=1, batch_size=1000, validate_every_N_epochs=1, start_validation_after_N_epochs=0, lambda_i=0.0, lambda_j=0.0, learning_rate=0.01, topK=200, sgd_mode='adagrad'): # Select only positive interactions URM_train_positive = self.URM_train.copy() URM_train_positive.data = URM_train_positive.data >= self.positive_threshold URM_train_positive.eliminate_zeros() self.sgd_mode = sgd_mode # Import compiled module from SLIM_BPR.Cython.SLIM_BPR_Cython_Epoch import SLIM_BPR_Cython_Epoch self.cythonEpoch = SLIM_BPR_Cython_Epoch( self.URM_mask, sparse_weights=self.sparse_weights, topK=topK, learning_rate=learning_rate, li_reg=lambda_i, lj_reg=lambda_j, batch_size=1, symmetric=self.symmetric, sgd_mode=sgd_mode) # Cal super.fit to start training super(SLIM_BPR_Cython, self).fit_alreadyInitialized( epochs=epochs, logFile=logFile, URM_test=URM_test, filterTopPop=filterTopPop, minRatingsPerUser=minRatingsPerUser, batch_size=batch_size, validate_every_N_epochs=validate_every_N_epochs, start_validation_after_N_epochs=start_validation_after_N_epochs, lambda_i=lambda_i, lambda_j=lambda_j, learning_rate=learning_rate, topK=topK)
def fit(self, epochs=300, logFile=None, batch_size=1000, lambda_i=0.0, lambda_j=0.0, learning_rate=1e-4, topK=200, sgd_mode='adagrad', gamma=0.995, beta_1=0.9, beta_2=0.999, stop_on_validation=False, lower_validatons_allowed=5, validation_metric="MAP", evaluator_object=None, validation_every_n=1): # Import compiled module from SLIM_BPR.Cython.SLIM_BPR_Cython_Epoch import SLIM_BPR_Cython_Epoch # Select only positive interactions URM_train_positive = self.URM_train.copy() URM_train_positive.data = URM_train_positive.data >= self.positive_threshold URM_train_positive.eliminate_zeros() self.sgd_mode = sgd_mode self.epochs = epochs self.cythonEpoch = SLIM_BPR_Cython_Epoch( self.URM_mask, train_with_sparse_weights=self.train_with_sparse_weights, final_model_sparse_weights=self.sparse_weights, topK=topK, learning_rate=learning_rate, li_reg=lambda_i, lj_reg=lambda_j, batch_size=1, symmetric=self.symmetric, sgd_mode=sgd_mode, gamma=gamma, beta_1=beta_1, beta_2=beta_2) if (topK != False and topK < 1): raise ValueError( "TopK not valid. Acceptable values are either False or a positive integer value. Provided value was '{}'" .format(topK)) self.topK = topK if validation_every_n is not None: self.validation_every_n = validation_every_n else: self.validation_every_n = np.inf if evaluator_object is None and stop_on_validation: evaluator_object = SequentialEvaluator(self.URM_validation, [5]) self.batch_size = batch_size self.lambda_i = lambda_i self.lambda_j = lambda_j self.learning_rate = learning_rate self._train_with_early_stopping(epochs, validation_every_n, stop_on_validation, validation_metric, lower_validatons_allowed, evaluator_object, algorithm_name=self.RECOMMENDER_NAME) self.get_S_incremental_and_set_W() sys.stdout.flush()
class SLIM_BPR_Cython(SimilarityMatrixRecommender, Recommender, Incremental_Training_Early_Stopping): RECOMMENDER_NAME = "SLIM_BPR_Recommender" def __init__(self, URM_train, positive_threshold=4, URM_validation=None, recompile_cython=False, final_model_sparse_weights=True, train_with_sparse_weights=False, symmetric=True): super(SLIM_BPR_Cython, self).__init__() self.URM_train = URM_train.copy() self.n_users = URM_train.shape[0] self.n_items = URM_train.shape[1] self.normalize = False self.positive_threshold = positive_threshold self.train_with_sparse_weights = train_with_sparse_weights self.sparse_weights = final_model_sparse_weights if URM_validation is not None: self.URM_validation = URM_validation.copy() else: self.URM_validation = None if self.train_with_sparse_weights: self.sparse_weights = True self.URM_mask = self.URM_train.copy() self.URM_mask.data = self.URM_mask.data >= self.positive_threshold self.URM_mask.eliminate_zeros() assert self.URM_mask.nnz > 0, "MatrixFactorization_Cython: URM_train_positive is empty, positive threshold is too high" self.symmetric = symmetric if not self.train_with_sparse_weights: n_items = URM_train.shape[1] requiredGB = 8 * n_items**2 / 1e+06 if symmetric: requiredGB /= 2 print( "SLIM_BPR_Cython: Estimated memory required for similarity matrix of {} items is {:.2f} MB" .format(n_items, requiredGB)) if recompile_cython: print("Compiling in Cython") self.runCompilationScript() print("Compilation Complete") def fit(self, epochs=300, logFile=None, batch_size=1000, lambda_i=0.0, lambda_j=0.0, learning_rate=1e-4, topK=200, sgd_mode='adagrad', gamma=0.995, beta_1=0.9, beta_2=0.999, stop_on_validation=False, lower_validatons_allowed=5, validation_metric="MAP", evaluator_object=None, validation_every_n=1): # Import compiled module from SLIM_BPR.Cython.SLIM_BPR_Cython_Epoch import SLIM_BPR_Cython_Epoch # Select only positive interactions URM_train_positive = self.URM_train.copy() URM_train_positive.data = URM_train_positive.data >= self.positive_threshold URM_train_positive.eliminate_zeros() self.sgd_mode = sgd_mode self.epochs = epochs self.cythonEpoch = SLIM_BPR_Cython_Epoch( self.URM_mask, train_with_sparse_weights=self.train_with_sparse_weights, final_model_sparse_weights=self.sparse_weights, topK=topK, learning_rate=learning_rate, li_reg=lambda_i, lj_reg=lambda_j, batch_size=1, symmetric=self.symmetric, sgd_mode=sgd_mode, gamma=gamma, beta_1=beta_1, beta_2=beta_2) if (topK != False and topK < 1): raise ValueError( "TopK not valid. Acceptable values are either False or a positive integer value. Provided value was '{}'" .format(topK)) self.topK = topK if validation_every_n is not None: self.validation_every_n = validation_every_n else: self.validation_every_n = np.inf if evaluator_object is None and stop_on_validation: evaluator_object = SequentialEvaluator(self.URM_validation, [5]) self.batch_size = batch_size self.lambda_i = lambda_i self.lambda_j = lambda_j self.learning_rate = learning_rate self._train_with_early_stopping(epochs, validation_every_n, stop_on_validation, validation_metric, lower_validatons_allowed, evaluator_object, algorithm_name=self.RECOMMENDER_NAME) self.get_S_incremental_and_set_W() sys.stdout.flush() def _initialize_incremental_model(self): self.S_incremental = self.cythonEpoch.get_S() self.S_best = self.S_incremental.copy() def _update_incremental_model(self): self.get_S_incremental_and_set_W() def _update_best_model(self): self.S_best = self.S_incremental.copy() def _run_epoch(self, num_epoch): self.cythonEpoch.epochIteration_Cython() def get_S_incremental_and_set_W(self): self.S_incremental = self.cythonEpoch.get_S() if self.train_with_sparse_weights: self.W_sparse = self.S_incremental else: if self.sparse_weights: self.W_sparse = similarityMatrixTopK(self.S_incremental, k=self.topK) else: self.W = self.S_incremental def writeCurrentConfig(self, currentEpoch, results_run, logFile): current_config = { 'lambda_i': self.lambda_i, 'lambda_j': self.lambda_j, 'batch_size': self.batch_size, 'learn_rate': self.learning_rate, 'topK_similarity': self.topK, 'epoch': currentEpoch } print("Test case: {}\nResults {}\n".format(current_config, results_run)) # print("Weights: {}\n".format(str(list(self.weights)))) sys.stdout.flush() if (logFile != None): logFile.write("Test case: {}, Results {}\n".format( current_config, results_run)) # logFile.write("Weights: {}\n".format(str(list(self.weights)))) logFile.flush() def runCompilationScript(self): # Run compile script setting the working directory to ensure the compiled file are contained in the # appropriate subfolder and not the project root compiledModuleSubfolder = "/SLIM_BPR/Cython" #fileToCompile_list = ['Sparse_Matrix_CSR.pyx', 'SLIM_BPR_Cython_Epoch.pyx'] fileToCompile_list = ['SLIM_BPR_Cython_Epoch.pyx'] for fileToCompile in fileToCompile_list: command = [ 'python', 'compileCython.py', fileToCompile, 'build_ext', '--inplace' ] output = subprocess.check_output(' '.join(command), shell=True, cwd=os.getcwd() + compiledModuleSubfolder) try: command = ['cython', fileToCompile, '-a'] output = subprocess.check_output(' '.join(command), shell=True, cwd=os.getcwd() + compiledModuleSubfolder) except: pass print("Compiled module saved in subfolder: {}".format( compiledModuleSubfolder))
def fit(self, epochs=300, positive_threshold_BPR=None, train_with_sparse_weights=None, symmetric=True, random_seed=None, batch_size=1000, lambda_i=0.01, lambda_j=0.001, learning_rate=1e-4, topK=200, sgd_mode='adagrad', gamma=0.995, beta_1=0.9, beta_2=0.999, **earlystopping_kwargs): # Import compiled module import pyximport pyximport.install() from SLIM_BPR.Cython.SLIM_BPR_Cython_Epoch import SLIM_BPR_Cython_Epoch self.symmetric = symmetric self.train_with_sparse_weights = train_with_sparse_weights if self.train_with_sparse_weights is None: # auto select required_m = estimate_required_MB(self.n_items, self.symmetric) total_m, _, available_m = get_RAM_status() if total_m is not None: string = "Automatic selection of fastest train mode. Available RAM is {:.2f} MB ({:.2f}%) of {:.2f} MB, required is {:.2f} MB. ".format( available_m, available_m / total_m * 100, total_m, required_m) else: string = "Automatic selection of fastest train mode. Unable to get current RAM status, you may be using a non-Linux operating system. " if total_m is None or required_m / available_m < self.free_mem_threshold: print(string + "Using dense matrix.") self.train_with_sparse_weights = False else: print(string + "Using sparse matrix.") self.train_with_sparse_weights = True # Select only positive interactions URM_train_positive = self.URM_train.copy() self.positive_threshold_BPR = positive_threshold_BPR self.sgd_mode = sgd_mode self.epochs = epochs if self.positive_threshold_BPR is not None: URM_train_positive.data = URM_train_positive.data >= self.positive_threshold_BPR URM_train_positive.eliminate_zeros() assert URM_train_positive.nnz > 0, "SLIM_BPR_Cython: URM_train_positive is empty, positive threshold is too high" self.cythonEpoch = SLIM_BPR_Cython_Epoch( URM_train_positive, train_with_sparse_weights=self.train_with_sparse_weights, final_model_sparse_weights=True, topK=topK, learning_rate=learning_rate, li_reg=lambda_i, lj_reg=lambda_j, batch_size=1, symmetric=self.symmetric, sgd_mode=sgd_mode, verbose=self.verbose, random_seed=random_seed, gamma=gamma, beta_1=beta_1, beta_2=beta_2) if (topK != False and topK < 1): raise ValueError( "TopK not valid. Acceptable values are either False or a positive integer value. Provided value was '{}'" .format(topK)) self.topK = topK self.batch_size = batch_size self.lambda_i = lambda_i self.lambda_j = lambda_j self.learning_rate = learning_rate self.S_incremental = self.cythonEpoch.get_S() self.S_best = self.S_incremental.copy() self._train_with_early_stopping(epochs, algorithm_name=self.RECOMMENDER_NAME, **earlystopping_kwargs) self.get_S_incremental_and_set_W() self.cythonEpoch._dealloc() sys.stdout.flush()
class SLIM_BPR_Cython(BaseItemSimilarityMatrixRecommender, Incremental_Training_Early_Stopping): RECOMMENDER_NAME = "SLIM_BPR_Recommender" def __init__(self, URM_train, verbose=True, free_mem_threshold=0.5, recompile_cython=False): super(SLIM_BPR_Cython, self).__init__(URM_train) assert free_mem_threshold >= 0.0 and free_mem_threshold <= 1.0, "SLIM_BPR_Recommender: free_mem_threshold must be between 0.0 and 1.0, provided was '{}'".format( free_mem_threshold) self.n_users, self.n_items = self.URM_train.shape self.free_mem_threshold = free_mem_threshold self.verbose = verbose if recompile_cython: print("Compiling in Cython") self.runCompilationScript() print("Compilation Complete") def fit(self, epochs=300, positive_threshold_BPR=None, train_with_sparse_weights=None, symmetric=True, random_seed=None, batch_size=1000, lambda_i=0.01, lambda_j=0.001, learning_rate=1e-4, topK=200, sgd_mode='adagrad', gamma=0.995, beta_1=0.9, beta_2=0.999, **earlystopping_kwargs): # Import compiled module import pyximport pyximport.install() from SLIM_BPR.Cython.SLIM_BPR_Cython_Epoch import SLIM_BPR_Cython_Epoch self.symmetric = symmetric self.train_with_sparse_weights = train_with_sparse_weights if self.train_with_sparse_weights is None: # auto select required_m = estimate_required_MB(self.n_items, self.symmetric) total_m, _, available_m = get_RAM_status() if total_m is not None: string = "Automatic selection of fastest train mode. Available RAM is {:.2f} MB ({:.2f}%) of {:.2f} MB, required is {:.2f} MB. ".format( available_m, available_m / total_m * 100, total_m, required_m) else: string = "Automatic selection of fastest train mode. Unable to get current RAM status, you may be using a non-Linux operating system. " if total_m is None or required_m / available_m < self.free_mem_threshold: print(string + "Using dense matrix.") self.train_with_sparse_weights = False else: print(string + "Using sparse matrix.") self.train_with_sparse_weights = True # Select only positive interactions URM_train_positive = self.URM_train.copy() self.positive_threshold_BPR = positive_threshold_BPR self.sgd_mode = sgd_mode self.epochs = epochs if self.positive_threshold_BPR is not None: URM_train_positive.data = URM_train_positive.data >= self.positive_threshold_BPR URM_train_positive.eliminate_zeros() assert URM_train_positive.nnz > 0, "SLIM_BPR_Cython: URM_train_positive is empty, positive threshold is too high" self.cythonEpoch = SLIM_BPR_Cython_Epoch( URM_train_positive, train_with_sparse_weights=self.train_with_sparse_weights, final_model_sparse_weights=True, topK=topK, learning_rate=learning_rate, li_reg=lambda_i, lj_reg=lambda_j, batch_size=1, symmetric=self.symmetric, sgd_mode=sgd_mode, verbose=self.verbose, random_seed=random_seed, gamma=gamma, beta_1=beta_1, beta_2=beta_2) if (topK != False and topK < 1): raise ValueError( "TopK not valid. Acceptable values are either False or a positive integer value. Provided value was '{}'" .format(topK)) self.topK = topK self.batch_size = batch_size self.lambda_i = lambda_i self.lambda_j = lambda_j self.learning_rate = learning_rate self.S_incremental = self.cythonEpoch.get_S() self.S_best = self.S_incremental.copy() self._train_with_early_stopping(epochs, algorithm_name=self.RECOMMENDER_NAME, **earlystopping_kwargs) self.get_S_incremental_and_set_W() self.cythonEpoch._dealloc() sys.stdout.flush() def _prepare_model_for_validation(self): self.get_S_incremental_and_set_W() def _update_best_model(self): self.S_best = self.S_incremental.copy() def _run_epoch(self, num_epoch): self.cythonEpoch.epochIteration_Cython() def get_S_incremental_and_set_W(self): self.S_incremental = self.cythonEpoch.get_S() if self.train_with_sparse_weights: self.W_sparse = self.S_incremental self.W_sparse = check_matrix(self.W_sparse, format='csr') else: self.W_sparse = similarityMatrixTopK(self.S_incremental, k=self.topK) self.W_sparse = check_matrix(self.W_sparse, format='csr') def runCompilationScript(self): # Run compile script setting the working directory to ensure the compiled file are contained in the # appropriate subfolder and not the project root file_subfolder = "/SLIM_BPR/Cython" file_to_compile_list = ['SLIM_BPR_Cython_Epoch.pyx'] run_compile_subprocess(file_subfolder, file_to_compile_list) print("{}: Compiled module {} in subfolder: {}".format( self.RECOMMENDER_NAME, file_to_compile_list, file_subfolder))
def fit(self, epochs=300, logFile=None, batch_size=1000, lambda_i=0.1, lambda_j=0.1, learning_rate=1e-3, topK=200, sgd_mode='adagrad', gamma=0.995, beta_1=0.9, beta_2=0.999, stop_on_validation=False, lower_validatons_allowed=2, validation_metric="MAP", evaluator_object=None, validation_every_n=50, old_similarity_matrix=None, force_compute_sim=True): ''' :param epochs: max number of epochs :param logFile: :param batch_size: :param lambda_i: first regualrizer :param lambda_j: second regualrizer :param learning_rate: :param topK: :param sgd_mode: :param gamma: :param beta_1: :param beta_2: :param stop_on_validation: should I stop after some validations? :param lower_validatons_allowed: stop after n validations that worse the previous one :param validation_metric: :param evaluator_object: :param validation_every_n: how often do validations? :param old_similarity_matrix: if you want to start from a fixed similarity matrix :param force_compute_sim: :return: ''' self.lambda_i = lambda_i self.lambda_j = lambda_j self.learning_rate = learning_rate self.topK = topK self.epochs = epochs if not force_compute_sim: found = True try: with open( os.path.join( "IntermediateComputations", "SLIM_BPR", "totURM={}_topK={}_lambdai={}_lambdaj={}_lr={}_epochs={}.pkl" .format(str(len(self.URM_train.data)), str(self.topK), str(self.lambda_i), str(self.lambda_j), str(self.learning_rate), str(self.epochs))), 'rb') as handle: (W_sparse_new) = pickle.load(handle) except FileNotFoundError: found = False if found: self.W_sparse = W_sparse_new print("Saved SLIM Matrix Used!") return if evaluator_object is None and stop_on_validation: print("Creating evaluator object for SLIM BPR") evaluator_object = SequentialEvaluator(self.URM_validation, self.URM_train) # Import compiled module from SLIM_BPR.Cython.SLIM_BPR_Cython_Epoch import SLIM_BPR_Cython_Epoch # Select only positive interactions URM_train_positive = self.URM_train.copy() URM_train_positive.data = URM_train_positive.data >= self.positive_threshold URM_train_positive.eliminate_zeros() self.sgd_mode = sgd_mode self.epochs = epochs self.cythonEpoch = SLIM_BPR_Cython_Epoch( self.URM_mask, old_similarity=old_similarity_matrix, train_with_sparse_weights=self.train_with_sparse_weights, final_model_sparse_weights=self.sparse_weights, topK=topK, learning_rate=learning_rate, li_reg=lambda_i, lj_reg=lambda_j, batch_size=1, symmetric=self.symmetric, # di default è simmetrica sgd_mode=sgd_mode, gamma=gamma, beta_1=beta_1, beta_2=beta_2) if topK != False and topK < 1: raise ValueError( "TopK not valid. Acceptable values are either False or a positive integer value. Provided value was '{}'" .format(topK)) self.topK = topK if validation_every_n is not None: self.validation_every_n = validation_every_n else: self.validation_every_n = np.inf self.batch_size = batch_size self.lambda_i = lambda_i self.lambda_j = lambda_j self.learning_rate = learning_rate self._train_with_early_stopping(epochs, validation_every_n, stop_on_validation, validation_metric, lower_validatons_allowed, evaluator_object, algorithm_name=self.RECOMMENDER_NAME) self.get_S_incremental_and_set_W() # self.normalized_SLIM() with open( os.path.join( "IntermediateComputations", "SLIM_BPR", "totURM={}_topK={}_lambdai={}_lambdaj={}_lr={}_epochs={}.pkl" .format(str(len(self.URM_train.data)), str(self.topK), str(self.lambda_i), str(self.lambda_j), str(self.learning_rate), str(self.epochs))), 'wb') as handle: pickle.dump(self.W_sparse, handle, protocol=pickle.HIGHEST_PROTOCOL) sys.stdout.flush()
class SLIM_BPR_Cython(SimilarityMatrixRecommender, Recommender, Incremental_Training_Early_Stopping): RECOMMENDER_NAME = "SLIM_BPR_Recommender" def __init__(self, URM_train, positive_threshold=1, URM_validation=None, recompile_cython=False, final_model_sparse_weights=True, train_with_sparse_weights=False, symmetric=True): super(SLIM_BPR_Cython, self).__init__() self.URM_train = URM_train.copy() self.n_users = URM_train.shape[0] self.n_items = URM_train.shape[1] self.normalize = False self.positive_threshold = positive_threshold self.train_with_sparse_weights = train_with_sparse_weights self.sparse_weights = final_model_sparse_weights if URM_validation is not None: self.URM_validation = URM_validation.copy() else: self.URM_validation = None if self.train_with_sparse_weights: self.sparse_weights = True self.URM_mask = self.URM_train.copy() self.URM_mask.data = self.URM_mask.data >= self.positive_threshold self.URM_mask.eliminate_zeros() assert self.URM_mask.nnz > 0, "MatrixFactorization_Cython: URM_train_positive is empty, positive threshold is too high" self.symmetric = symmetric if not self.train_with_sparse_weights: n_items = URM_train.shape[1] requiredGB = 8 * n_items**2 / 1e+06 if symmetric: requiredGB /= 2 print( "SLIM_BPR_Cython: Estimated memory required for similarity matrix of {} items is {:.2f} MB" .format(n_items, requiredGB)) if recompile_cython: print("Compiling in Cython") self.runCompilationScript() print("Compilation Complete") def fit(self, epochs=300, logFile=None, batch_size=1000, lambda_i=0.1, lambda_j=0.1, learning_rate=1e-3, topK=200, sgd_mode='adagrad', gamma=0.995, beta_1=0.9, beta_2=0.999, stop_on_validation=False, lower_validatons_allowed=2, validation_metric="MAP", evaluator_object=None, validation_every_n=50, old_similarity_matrix=None, force_compute_sim=True): ''' :param epochs: max number of epochs :param logFile: :param batch_size: :param lambda_i: first regualrizer :param lambda_j: second regualrizer :param learning_rate: :param topK: :param sgd_mode: :param gamma: :param beta_1: :param beta_2: :param stop_on_validation: should I stop after some validations? :param lower_validatons_allowed: stop after n validations that worse the previous one :param validation_metric: :param evaluator_object: :param validation_every_n: how often do validations? :param old_similarity_matrix: if you want to start from a fixed similarity matrix :param force_compute_sim: :return: ''' self.lambda_i = lambda_i self.lambda_j = lambda_j self.learning_rate = learning_rate self.topK = topK self.epochs = epochs if not force_compute_sim: found = True try: with open( os.path.join( "IntermediateComputations", "SLIM_BPR", "totURM={}_topK={}_lambdai={}_lambdaj={}_lr={}_epochs={}.pkl" .format(str(len(self.URM_train.data)), str(self.topK), str(self.lambda_i), str(self.lambda_j), str(self.learning_rate), str(self.epochs))), 'rb') as handle: (W_sparse_new) = pickle.load(handle) except FileNotFoundError: found = False if found: self.W_sparse = W_sparse_new print("Saved SLIM Matrix Used!") return if evaluator_object is None and stop_on_validation: print("Creating evaluator object for SLIM BPR") evaluator_object = SequentialEvaluator(self.URM_validation, self.URM_train) # Import compiled module from SLIM_BPR.Cython.SLIM_BPR_Cython_Epoch import SLIM_BPR_Cython_Epoch # Select only positive interactions URM_train_positive = self.URM_train.copy() URM_train_positive.data = URM_train_positive.data >= self.positive_threshold URM_train_positive.eliminate_zeros() self.sgd_mode = sgd_mode self.epochs = epochs self.cythonEpoch = SLIM_BPR_Cython_Epoch( self.URM_mask, old_similarity=old_similarity_matrix, train_with_sparse_weights=self.train_with_sparse_weights, final_model_sparse_weights=self.sparse_weights, topK=topK, learning_rate=learning_rate, li_reg=lambda_i, lj_reg=lambda_j, batch_size=1, symmetric=self.symmetric, # di default è simmetrica sgd_mode=sgd_mode, gamma=gamma, beta_1=beta_1, beta_2=beta_2) if topK != False and topK < 1: raise ValueError( "TopK not valid. Acceptable values are either False or a positive integer value. Provided value was '{}'" .format(topK)) self.topK = topK if validation_every_n is not None: self.validation_every_n = validation_every_n else: self.validation_every_n = np.inf self.batch_size = batch_size self.lambda_i = lambda_i self.lambda_j = lambda_j self.learning_rate = learning_rate self._train_with_early_stopping(epochs, validation_every_n, stop_on_validation, validation_metric, lower_validatons_allowed, evaluator_object, algorithm_name=self.RECOMMENDER_NAME) self.get_S_incremental_and_set_W() # self.normalized_SLIM() with open( os.path.join( "IntermediateComputations", "SLIM_BPR", "totURM={}_topK={}_lambdai={}_lambdaj={}_lr={}_epochs={}.pkl" .format(str(len(self.URM_train.data)), str(self.topK), str(self.lambda_i), str(self.lambda_j), str(self.learning_rate), str(self.epochs))), 'wb') as handle: pickle.dump(self.W_sparse, handle, protocol=pickle.HIGHEST_PROTOCOL) sys.stdout.flush() def _initialize_incremental_model(self): self.S_incremental = self.cythonEpoch.get_S() self.S_best = self.S_incremental.copy() def _update_incremental_model(self): self.get_S_incremental_and_set_W() def _update_best_model(self): self.S_best = self.S_incremental.copy() def _run_epoch(self, num_epoch): self.cythonEpoch.epochIteration_Cython() def get_S_incremental_and_set_W(self): self.S_incremental = self.cythonEpoch.get_S() if self.train_with_sparse_weights: self.W_sparse = self.S_incremental else: if self.sparse_weights: self.W_sparse = similarityMatrixTopK(self.S_incremental, k=self.topK) else: self.W = self.S_incremental def writeCurrentConfig(self, currentEpoch, results_run, logFile): current_config = { 'lambda_i': self.lambda_i, 'lambda_j': self.lambda_j, 'batch_size': self.batch_size, 'learn_rate': self.learning_rate, 'topK_similarity': self.topK, 'epoch': currentEpoch } print("Test case: {}\nResults {}\n".format(current_config, results_run)) # print("Weights: {}\n".format(str(list(self.weights)))) sys.stdout.flush() if (logFile != None): logFile.write("Test case: {}, Results {}\n".format( current_config, results_run)) # logFile.write("Weights: {}\n".format(str(list(self.weights)))) logFile.flush() def runCompilationScript(self): # Run compile script setting the working directory to ensure the compiled file are contained in the # appropriate subfolder and not the project root compiledModuleSubfolder = "/SLIM_BPR/Cython" # fileToCompile_list = ['Sparse_Matrix_CSR.pyx', 'SLIM_BPR_Cython_Epoch.pyx'] fileToCompile_list = ['SLIM_BPR_Cython_Epoch.pyx'] for fileToCompile in fileToCompile_list: command = [ 'python', 'compileCython.py', fileToCompile, 'build_ext', '--inplace' ] output = subprocess.check_output(' '.join(command), shell=True, cwd=os.getcwd() + compiledModuleSubfolder) try: command = ['cython', fileToCompile, '-a'] output = subprocess.check_output(' '.join(command), shell=True, cwd=os.getcwd() + compiledModuleSubfolder) except: pass print("Compiled module saved in subfolder: {}".format( compiledModuleSubfolder)) # Command to run compilation script # python compileCython.py SLIM_BPR_Cython_Epoch.pyx build_ext --inplace # Command to generate html report # cython -a SLIM_BPR_Cython_Epoch.pyx def normalized_SLIM(self): self.W_sparse = self.W_sparse / self.W_sparse.max() * 0.55
class SLIM_BPR_Cython(SLIM_BPR_Python): def __init__(self, URM_train, positive_threshold=4, recompile_cython=False, sparse_weights=False, symmetric=True, sgd_mode='adagrad'): super(SLIM_BPR_Cython, self).__init__(URM_train, positive_threshold=positive_threshold, sparse_weights=sparse_weights) self.sgd_mode = sgd_mode self.symmetric = symmetric if not sparse_weights: n_items = URM_train.shape[1] requiredGB = 8 * n_items**2 / 1e+06 if symmetric: requiredGB /= 2 print( "SLIM_BPR_Cython: Estimated memory required for similarity matrix of {} items is {:.2f} MB" .format(n_items, requiredGB)) if recompile_cython: print("Compiling in Cython") self.runCompilationScript() print("Compilation Complete") def fit(self, epochs=30, logFile=None, URM_test=None, filterTopPop=False, minRatingsPerUser=1, batch_size=1000, validate_every_N_epochs=1, start_validation_after_N_epochs=0, lambda_i=0.0, lambda_j=0.0, learning_rate=0.01, topK=200, sgd_mode='adagrad'): # Select only positive interactions URM_train_positive = self.URM_train.copy() URM_train_positive.data = URM_train_positive.data >= self.positive_threshold URM_train_positive.eliminate_zeros() self.sgd_mode = sgd_mode # Import compiled module from SLIM_BPR.Cython.SLIM_BPR_Cython_Epoch import SLIM_BPR_Cython_Epoch self.cythonEpoch = SLIM_BPR_Cython_Epoch( self.URM_mask, sparse_weights=self.sparse_weights, topK=topK, learning_rate=learning_rate, li_reg=lambda_i, lj_reg=lambda_j, batch_size=1, symmetric=self.symmetric, sgd_mode=sgd_mode) # Cal super.fit to start training super(SLIM_BPR_Cython, self).fit_alreadyInitialized( epochs=epochs, logFile=logFile, URM_test=URM_test, filterTopPop=filterTopPop, minRatingsPerUser=minRatingsPerUser, batch_size=batch_size, validate_every_N_epochs=validate_every_N_epochs, start_validation_after_N_epochs=start_validation_after_N_epochs, lambda_i=lambda_i, lambda_j=lambda_j, learning_rate=learning_rate, topK=topK) def runCompilationScript(self): # Run compile script setting the working directory to ensure the compiled file are contained in the # appropriate subfolder and not the project root compiledModuleSubfolder = "/SLIM_BPR/Cython" #fileToCompile_list = ['Sparse_Matrix_CSR.pyx', 'SLIM_BPR_Cython_Epoch.pyx'] fileToCompile_list = ['SLIM_BPR_Cython_Epoch.pyx'] for fileToCompile in fileToCompile_list: command = [ 'python', 'compileCython.py', fileToCompile, 'build_ext', '--inplace' ] output = subprocess.check_output(' '.join(command), shell=True, cwd=os.getcwd() + compiledModuleSubfolder) try: command = ['cython', fileToCompile, '-a'] output = subprocess.check_output(' '.join(command), shell=True, cwd=os.getcwd() + compiledModuleSubfolder) except: pass print("Compiled module saved in subfolder: {}".format( compiledModuleSubfolder)) # Command to run compilation script #python compileCython.py SLIM_BPR_Cython_Epoch.pyx build_ext --inplace # Command to generate html report # cython -a SLIM_BPR_Cython_Epoch.pyx def updateSimilarityMatrix(self): self.S = self.cythonEpoch.get_S() if self.sparse_weights: self.W_sparse = self.S else: self.W = self.S def epochIteration(self): self.cythonEpoch.epochIteration_Cython() def writeCurrentConfig(self, currentEpoch, results_run, logFile): current_config = { 'learn_rate': self.learning_rate, 'topK_similarity': self.topK, 'epoch': currentEpoch, 'sgd_mode': self.sgd_mode } print("Test case: {}\nResults {}\n".format(current_config, results_run)) # print("Weights: {}\n".format(str(list(self.weights)))) sys.stdout.flush() if (logFile != None): logFile.write("Test case: {}, Results {}\n".format( current_config, results_run)) # logFile.write("Weights: {}\n".format(str(list(self.weights)))) logFile.flush()
class SLIM_BPR_Cython(SimilarityMatrixRecommender, Recommender, Incremental_Training_Early_Stopping): RECOMMENDER_NAME = "SLIM_BPR_Recommender" def __init__(self, URM_train, positive_threshold=0.05, URM_validation=None, recompile_cython=False, final_model_sparse_weights=True, train_with_sparse_weights=False, symmetric=True): super(SLIM_BPR_Cython, self).__init__() self.URM_train = URM_train.copy() self.n_users = URM_train.shape[0] self.n_items = URM_train.shape[1] self.normalize = False self.positive_threshold = positive_threshold self.train_with_sparse_weights = train_with_sparse_weights self.sparse_weights = final_model_sparse_weights if URM_validation is not None: self.URM_validation = URM_validation.copy() else: self.URM_validation = None if self.train_with_sparse_weights: self.sparse_weights = True self.URM_mask = self.URM_train.copy() self.URM_mask.data = self.URM_mask.data >= self.positive_threshold self.URM_mask.eliminate_zeros() assert self.URM_mask.nnz > 0, "MatrixFactorization_Cython: URM_train_positive is empty, positive threshold is too high" self.symmetric = symmetric if not self.train_with_sparse_weights: n_items = URM_train.shape[1] requiredGB = 8 * n_items**2 / 1e+06 if symmetric: requiredGB /= 2 print( "SLIM_BPR_Cython: Estimated memory required for similarity matrix of {} items is {:.2f} MB" .format(n_items, requiredGB)) if recompile_cython: print("Compiling in Cython") self.runCompilationScript() print("Compilation Complete") def fit(self, epochs=75, logFile=None, batch_size=1000, lambda_i=0.001, lambda_j=0.001, learning_rate=0.02, topK=200, sgd_mode='adagrad', gamma=0.995, beta_1=0.9, beta_2=0.999, stop_on_validation=False, lower_validatons_allowed=5, validation_metric="MAP", evaluator_object=None, validation_every_n=1): print("Loading slim model...") dataSubfolder = "./Dataset/" modelName = "Slim_all.npz" # modelName = "Slim_1.npz" try: self.W_sparse = sps.load_npz(dataSubfolder + modelName) except FileNotFoundError: # Import compiled module from SLIM_BPR.Cython.SLIM_BPR_Cython_Epoch import SLIM_BPR_Cython_Epoch # Select only positive interactions URM_train_positive = self.URM_train.copy() URM_train_positive.data = URM_train_positive.data >= self.positive_threshold URM_train_positive.eliminate_zeros() self.sgd_mode = sgd_mode self.epochs = epochs self.cythonEpoch = SLIM_BPR_Cython_Epoch( self.URM_mask, train_with_sparse_weights=self.train_with_sparse_weights, final_model_sparse_weights=self.sparse_weights, topK=topK, learning_rate=learning_rate, li_reg=lambda_i, lj_reg=lambda_j, batch_size=1, symmetric=self.symmetric, sgd_mode=sgd_mode, gamma=gamma, beta_1=beta_1, beta_2=beta_2) if (topK != False and topK < 1): raise ValueError( "TopK not valid. Acceptable values are either False or a positive integer value. Provided value was '{}'" .format(topK)) self.topK = topK if validation_every_n is not None: self.validation_every_n = validation_every_n else: self.validation_every_n = np.inf if evaluator_object is None and stop_on_validation: evaluator_object = SequentialEvaluator(self.URM_validation, [5]) self.batch_size = batch_size self.lambda_i = lambda_i self.lambda_j = lambda_j self.learning_rate = learning_rate self._train_with_early_stopping( epochs, validation_every_n, stop_on_validation, validation_metric, lower_validatons_allowed, evaluator_object, algorithm_name=self.RECOMMENDER_NAME) self.get_S_incremental_and_set_W() sys.stdout.flush() sps.save_npz(dataSubfolder + modelName, self.W_sparse) print("Loading completed for slim") def _initialize_incremental_model(self): self.S_incremental = self.cythonEpoch.get_S() self.S_best = self.S_incremental.copy() def _update_incremental_model(self): self.get_S_incremental_and_set_W() def _update_best_model(self): self.S_best = self.S_incremental.copy() def _run_epoch(self, num_epoch): self.cythonEpoch.epochIteration_Cython() def get_S_incremental_and_set_W(self): self.S_incremental = self.cythonEpoch.get_S() if self.train_with_sparse_weights: self.W_sparse = self.S_incremental else: if self.sparse_weights: self.W_sparse = similarityMatrixTopK(self.S_incremental, k=self.topK) else: self.W = self.S_incremental def writeCurrentConfig(self, currentEpoch, results_run, logFile): current_config = { 'lambda_i': self.lambda_i, 'lambda_j': self.lambda_j, 'batch_size': self.batch_size, 'learn_rate': self.learning_rate, 'topK_similarity': self.topK, 'epoch': currentEpoch } print("Test case: {}\nResults {}\n".format(current_config, results_run)) # print("Weights: {}\n".format(str(list(self.weights)))) sys.stdout.flush() if (logFile != None): logFile.write("Test case: {}, Results {}\n".format( current_config, results_run)) # logFile.write("Weights: {}\n".format(str(list(self.weights)))) logFile.flush() def runCompilationScript(self): # Run compile script setting the working directory to ensure the compiled file are contained in the # appropriate subfolder and not the project root compiledModuleSubfolder = "/SLIM_BPR/Cython" # fileToCompile_list = ['Sparse_Matrix_CSR.pyx', 'SLIM_BPR_Cython_Epoch.pyx'] fileToCompile_list = ['SLIM_BPR_Cython_Epoch.pyx'] for fileToCompile in fileToCompile_list: command = [ 'python', 'compileCython.py', fileToCompile, 'build_ext', '--inplace' ] output = subprocess.check_output(' '.join(command), shell=True, cwd=os.getcwd() + compiledModuleSubfolder) try: command = ['cython', fileToCompile, '-a'] output = subprocess.check_output(' '.join(command), shell=True, cwd=os.getcwd() + compiledModuleSubfolder) except: pass print("Compiled module saved in subfolder: {}".format( compiledModuleSubfolder)) # Command to run compilation script # python compileCython.py SLIM_BPR_Cython_Epoch.pyx build_ext --inplace # Command to generate html report # cython -a SLIM_BPR_Cython_Epoch.pyx def recommend(self, playlist_id, at=None, exclude_seen=True): # compute the scores using the dot product playlist_profile = self.URM_train[playlist_id] scores = playlist_profile.dot(self.W_sparse).toarray().ravel() maximum = np.amax(scores) normalized_scores = np.true_divide(scores, maximum) if exclude_seen: scores = self.filter_seen(playlist_id, normalized_scores) # rank items ranking = scores.argsort()[::-1] return ranking[:at] def get_scores(self, playlist_id): playlist_profile = self.URM_train[playlist_id] scores = playlist_profile.dot(self.W_sparse).toarray().ravel() maximum = np.amax(scores) normalized_scores = np.true_divide(scores, maximum) return normalized_scores def get_weighted_score(self, playlist_id): profile = self.URM_train[playlist_id] n = profile.count_nonzero() float_profile = profile.astype(np.float) if (playlist_id in self.seq): # tune this diff = 0.15 index = 0 slope = diff / n increment = slope while index < n: float_profile.data[index] = (1 - diff) + increment increment += slope index += 1 scores = float_profile.dot(self.W_sparse).toarray().ravel() maximum = np.amax(scores) normalized_scores = np.true_divide(scores, maximum) return normalized_scores def filter_seen(self, playlist_id, scores): start_pos = self.URM_train.indptr[playlist_id] end_pos = self.URM_train.indptr[playlist_id + 1] playlist_profile = self.URM_train.indices[start_pos:end_pos] scores[playlist_profile] = -np.inf return scores
class SLIM_BPR_Cython(SLIM_BPR_Python): def __init__(self, URM_train, positive_threshold=3, recompile_cython=False, sparse_weights=False, sgd_mode='adagrad'): super(SLIM_BPR_Cython, self).__init__(URM_train, positive_threshold=positive_threshold, sparse_weights=sparse_weights) self.sgd_mode = sgd_mode if recompile_cython: print("Compiling in Cython") self.runCompilationScript() print("Compilation Complete") def fit(self, epochs=30, logFile=None, URM_test=None, minRatingsPerUser=1, batch_size=1000, validate_every_N_epochs=1, start_validation_after_N_epochs=0, lambda_i=0.0025, lambda_j=0.00025, learning_rate=0.05, topK=False, sgd_mode='adagrad'): self.eligibleUsers = [] # Select only positive interactions URM_train_positive = self.URM_train.copy() URM_train_positive.data = URM_train_positive.data >= self.positive_threshold URM_train_positive.eliminate_zeros() for user_id in range(self.n_users): start_pos = URM_train_positive.indptr[user_id] end_pos = URM_train_positive.indptr[user_id + 1] if len(URM_train_positive.indices[start_pos:end_pos]) > 0: self.eligibleUsers.append(user_id) self.eligibleUsers = np.array(self.eligibleUsers, dtype=np.int64) self.sgd_mode = sgd_mode # Import compiled module from SLIM_BPR.Cython.SLIM_BPR_Cython_Epoch import SLIM_BPR_Cython_Epoch self.cythonEpoch = SLIM_BPR_Cython_Epoch(self.URM_mask, self.sparse_weights, self.eligibleUsers, topK=topK, learning_rate=learning_rate, batch_size=1, sgd_mode=sgd_mode) # Cal super.fit to start training super(SLIM_BPR_Cython, self).fit_alreadyInitialized( epochs=epochs, logFile=logFile, URM_test=URM_test, minRatingsPerUser=minRatingsPerUser, batch_size=batch_size, validate_every_N_epochs=validate_every_N_epochs, start_validation_after_N_epochs=start_validation_after_N_epochs, lambda_i=lambda_i, lambda_j=lambda_j, learning_rate=learning_rate, topK=topK) def runCompilationScript(self): # Run compile script setting the working directory to ensure the compiled file are contained in the # appropriate subfolder and not the project root compiledModuleSubfolder = "/SLIM_BPR/Cython" fileToCompile_list = ['SLIM_BPR_Cython_Epoch.pyx'] for fileToCompile in fileToCompile_list: command = [ 'python3', 'compileCython.py', fileToCompile, 'build_ext', '--inplace' ] output = subprocess.check_output(' '.join(command), shell=True, cwd=os.getcwd() + compiledModuleSubfolder) try: command = ['cython', fileToCompile, '-a'] output = subprocess.check_output(' '.join(command), shell=True, cwd=os.getcwd() + compiledModuleSubfolder) except: pass print("Compiled module saved in subfolder: {}".format( compiledModuleSubfolder)) # Command to run compilation script #python compileCython.py SLIM_BPR_Cython_Epoch.pyx build_ext --inplace # Command to generate html report #subprocess.call(["cython", "-a", "SLIM_BPR_Cython_Epoch.pyx"]) def epochIteration(self): self.S = self.cythonEpoch.epochIteration_Cython() if self.sparse_weights: self.W_sparse = self.S else: self.W = self.S def writeCurrentConfig(self, currentEpoch, results_run, logFile): current_config = { 'learn_rate': self.learning_rate, 'topK_similarity': self.topK, 'epoch': currentEpoch, 'sgd_mode': self.sgd_mode } print("Test case: {}\nResults {}\n".format(current_config, results_run)) sys.stdout.flush() if (logFile != None): logFile.write("Test case: {}, Results {}\n".format( current_config, results_run)) logFile.flush() def get_similarity(self): return self.W_sparse