def fit(self, topK=600, shrink=1000, similarity='asymmetric', normalize=True, feature_weighting="BM25", save_model=False, best_parameters=False, **similarity_args): similarity_args = {'asymmetric_alpha': 0.40273209903969387} if best_parameters: m = OfflineDataLoader() folder_path_icbf, file_name_icbf = m.get_parameter( self.RECOMMENDER_NAME) self.loadModel(folder_path=folder_path_icbf, file_name=file_name_icbf) if self.feature_weighting == "none": pass if self.feature_weighting == "BM25": self.ICM = self.ICM.astype(np.float32) self.ICM = to_okapi(self.ICM) elif self.feature_weighting == "TF-IDF": self.ICM = self.ICM.astype(np.float32) self.ICM = to_tfidf(self.ICM) similarity = Compute_Similarity(self.ICM.T, shrink=shrink, topK=topK, normalize=normalize, similarity=similarity, **similarity_args) else: self.topK = topK self.shrink = shrink similarity = Compute_Similarity(self.ICM.T, shrink=shrink, topK=topK, normalize=normalize, similarity=similarity, **similarity_args) self.parameters = "sparse_weights= {0}, similarity= {1}, shrink= {2}, neighbourhood={3}, " \ "normalize= {4}".format( self.sparse_weights, similarity, self.shrink, self.topK, normalize) if self.sparse_weights: self.W_sparse = similarity.compute_similarity() else: self.W = similarity.compute_similarity() self.W = self.W.toarray() if save_model: self.saveModel("saved_models/submission/", file_name="ItemKNNCBFRecommender_submission_model")
def fit(self, topK=250, shrink=100, alpha=0.7017094, beta=0.51034483, gamma=0.16206897, normalize=False, similarity="jaccard", **similarity_args): self.k = topK self.shrink = shrink self.alpha = alpha self.beta = beta self.gamma = gamma self.normalize = normalize print("Item Tree Hybrid Recommender: Model fitting begins") # Calculate all the Similarity Matrices One by one # URM tfidf --> 50446 x 50446 self.sim_URM_tfidf = Compute_Similarity(self.URM_train_tfidf.T, shrink=0, topK=200, normalize=normalize, similarity=similarity, **similarity_args) # ICM tfidf --> 20635 x 20635 self.ICM = to_okapi(self.ICM) self.sim_ICM_tfidf = Compute_Similarity(self.ICM.T, shrink=0, topK=25, normalize=normalize, similarity=similarity, **similarity_args) # URM.T tfidf --> 20635 x 20635 self.sim_URM_T_tfidf = Compute_Similarity(self.URM_train_tfidf, shrink=10, topK=350, normalize=normalize, similarity=similarity, **similarity_args) # Slim --> 20635 x 20635 self.sim_Slim = Slim_mark1(self.URM_train) if self.sparse_weights: # URM self.W_sparse_URM = self.sim_URM_tfidf.compute_similarity() # UCM self.W_sparse_ICM = self.sim_ICM_tfidf.compute_similarity() # self.W_sparse_UCM = self.sim_UCM_tfidf.fit() # ICM self.W_sparse_URM_T = self.sim_URM_T_tfidf.compute_similarity() # Slim # lambda_i = 0.37142857, lambda_j = 0.97857143 self.W_sparse_Slim = self.sim_Slim.fit() # add the parameters for the logging self.parameters = "sparse_weights= {}, similarity= {},shrink= {}, neighbourhood={},normalize= {}, alpha= {}, beta={}, gamma={}".format( self.sparse_weights, similarity, shrink, topK, normalize, alpha, beta, gamma)
def fit(self, topK=175, shrink=400, similarity="asymmetric", normalize=True, feature_weighting="BM25", save_model=False, best_parameters=False, location="training", submission=False, offline=False, **similarity_args): if offline: m = OfflineDataLoader() folder_path_icf, file_name_icf = m.get_model( self.RECOMMENDER_NAME, training=(not submission)) self.loadModel(folder_path=folder_path_icf, file_name=file_name_icf) else: if best_parameters: m = OfflineDataLoader() folder_path_ucf, file_name_ucf = m.get_parameter( self.RECOMMENDER_NAME) self.loadModel(folder_path=folder_path_ucf, file_name=file_name_ucf) if self.feature_weighting == "none": similarity = Compute_Similarity(self.URM_train.T, **similarity_args) else: if feature_weighting == "BM25": self.URM_train_copy = self.URM_train.astype(np.float32) self.URM_train_copy = to_okapi(self.URM_train) elif feature_weighting == "TF-IDF": self.URM_train_copy = self.URM_train.astype(np.float32) self.URM_train_copy = to_tfidf(self.URM_train) similarity_args = { 'asymmetric_alpha': 0.11483114799990246, 'normalize': True, 'shrink': 450, 'similarity': 'asymmetric', 'topK': 200 } similarity = Compute_Similarity(self.URM_train_copy.T, **similarity_args) else: self.topK = topK self.shrink = shrink self.feature_weighting = feature_weighting similarity_args = {'asymmetric_alpha': 0.0033404951135529437} if self.feature_weighting == "BM25": self.URM_train_copy = self.URM_train.astype(np.float32) self.URM_train_copy = to_okapi(self.URM_train) elif self.feature_weighting == "TF-IDF": self.URM_train_copy = self.URM_train.astype(np.float32) self.URM_train_copy = to_tfidf(self.URM_train) if self.feature_weighting == "none": similarity = Compute_Similarity(self.URM_train.T, shrink=shrink, topK=topK, normalize=normalize, similarity=similarity, **similarity_args) else: similarity = Compute_Similarity(self.URM_train_copy.T, shrink=shrink, topK=topK, normalize=normalize, similarity=similarity, **similarity_args) self.parameters = "sparse_weights= {0}, similarity= {1}, shrink= {2}, neighbourhood={3}, " \ "normalize= {4}".format(self.sparse_weights, similarity, shrink, topK, normalize) if self.sparse_weights: self.W_sparse = similarity.compute_similarity() else: self.W = similarity.compute_similarity() self.W = self.W.toarray() if save_model: self.saveModel("saved_models/submission/", file_name=self.RECOMMENDER_NAME + "_" + location + "_model")
def get_URM_train_okapi(self): if self.URM_train is None: raise TypeError("PlaylistDataReader: URM train is not build") else: self.URM_train_okapi = to_okapi(self.URM_train) return self.URM_train_okapi
def fit(self, topK=400, shrink=200, similarity='cosine', feature_weighting="BM25", normalize=True, save_model=False, best_parameters=False, offline=False, submission=False, location="submission", **similarity_args): #similarity_args = {'tversky_alpha': 0.8047100184165605, 'tversky_beta': 1.9775806370926445} #self.feature_weighting = feature_weighting if offline: m = OfflineDataLoader() folder_path_icf, file_name_icf = m.get_model( self.RECOMMENDER_NAME, training=(not submission)) self.loadModel(folder_path=folder_path_icf, file_name=file_name_icf) else: if best_parameters: m = OfflineDataLoader() folder_path_icf, file_name_icf = m.get_parameter( self.RECOMMENDER_NAME) self.loadModel(folder_path=folder_path_icf, file_name=file_name_icf) #similarity_args = {'normalize': True, 'shrink': 0, 'similarity': 'tversky', 'topK': 20, 'tversky_alpha': 0.18872151621891953, 'tversky_beta': 1.99102432161935} similarity_args = { 'feature_weighting': 'BM25', 'normalize': True, 'shrink': 200, 'similarity': 'cosine', 'topK': 400 } if self.feature_weighting == "none": pass if self.feature_weighting == "BM25": self.URM_train_copy = self.URM_train.astype(np.float32) self.URM_train_copy = to_okapi(self.URM_train) elif self.feature_weighting == "TF-IDF": self.URM_train_copy = self.URM_train.astype(np.float32) self.URM_train_copy = to_tfidf(self.URM_train) similarity = Compute_Similarity(self.URM_train_copy, **similarity_args) else: self.topK = topK self.shrink = shrink self.feature_weighting = feature_weighting if self.feature_weighting == "BM25": self.URM_train_copy = self.URM_train.astype(np.float32) self.URM_train_copy = to_okapi(self.URM_train) elif self.feature_weighting == "TF-IDF": self.URM_train_copy = self.URM_train.astype(np.float32) self.URM_train_copy = to_tfidf(self.URM_train) if self.feature_weighting == "none": similarity = Compute_Similarity(self.URM_train, shrink=shrink, topK=topK, normalize=normalize, similarity=similarity, **similarity_args) else: similarity = Compute_Similarity(self.URM_train_copy, shrink=shrink, topK=topK, normalize=normalize, similarity=similarity, **similarity_args) self.parameters = "sparse_weights= {0}, similarity= {1}, shrink= {2}, neighbourhood={3}, normalize={4}".format( self.sparse_weights, similarity, shrink, topK, normalize) if self.sparse_weights: self.W_sparse = similarity.compute_similarity() else: self.W = similarity.compute_similarity() self.W = self.W.toarray() if save_model: self.saveModel("saved_models/" + location + "/", file_name=self.RECOMMENDER_NAME + "_" + location + "_model")