def fit(self, trainset): AlgoBase.fit(self, trainset) numUsers = trainset.n_users numItems = trainset.n_items trainingMatrix = np.zeros([numUsers, numItems, 10], dtype=np.float32) for (uid, iid, rating) in trainset.all_ratings(): adjustedRating = int(float(rating)*2.0) - 1 trainingMatrix[int(uid), int(iid), adjustedRating] = 1 # Flatten to a 2D array, with nodes for each possible rating type on each possible item, for every user. trainingMatrix = np.reshape(trainingMatrix, [trainingMatrix.shape[0], -1]) # Create an RBM with (num items * rating values) visible nodes rbm = RBM(trainingMatrix.shape[1], hiddenDimensions=self.hiddenDim, learningRate=self.learningRate, batchSize=self.batchSize, epochs=self.epochs) rbm.Train(trainingMatrix) self.predictedRatings = np.zeros([numUsers, numItems], dtype=np.float32) for uiid in range(trainset.n_users): if (uiid % 50 == 0): print("Processing user ", uiid) recs = rbm.GetRecommendations([trainingMatrix[uiid]]) recs = np.reshape(recs, [numItems, 10]) for itemID, rec in enumerate(recs): # The obvious thing would be to just take the rating with the highest score: #rating = rec.argmax() # ... but this just leads to a huge multi-way tie for 5-star predictions. # The paper suggests performing normalization over K values to get probabilities # and take the expectation as your prediction, so we'll do that instead: normalized = self.softmax(rec) rating = np.average(np.arange(10), weights=normalized) self.predictedRatings[uiid, itemID] = (rating + 1) * 0.5 return self
def train(self, trainset): AlgoBase.train(self, trainset) # Compute baselines and similarities self.bu, self.bi = self.compute_baselines() self.sim = self.compute_similarities()
def fit(self, trainset): AlgoBase.fit(self, trainset) # Compute item similarity matrix based on content attributes # Load up genre vectors for every movie ml = MovieLens() genres = ml.getGenres() years = ml.getYears() mes = ml.getMiseEnScene() print("Computing content-based similarity matrix...") # Compute genre distance for every movie combination as a 2x2 matrix self.similarities = np.zeros((self.trainset.n_items, self.trainset.n_items)) for thisRating in range(self.trainset.n_items): if (thisRating % 100 == 0): print(thisRating, " of ", self.trainset.n_items) for otherRating in range(thisRating+1, self.trainset.n_items): thisMovieID = int(self.trainset.to_raw_iid(thisRating)) otherMovieID = int(self.trainset.to_raw_iid(otherRating)) genreSimilarity = self.computeGenreSimilarity(thisMovieID, otherMovieID, genres) yearSimilarity = self.computeYearSimilarity(thisMovieID, otherMovieID, years) mesSimilarity = self.computeMiseEnSceneSimilarity(thisMovieID, otherMovieID, mes) self.similarities[thisRating, otherRating] = genreSimilarity * yearSimilarity self.similarities[otherRating, thisRating] = self.similarities[thisRating, otherRating] print("...done.") return self
def fit(self, trainset): AlgoBase.fit(self, trainset) n = trainset.n_users m = trainset.n_items #print(n,m) self.K = agnp.zeros((n,m)) self.R = agnp.zeros((n,m)) for u, i, rating in trainset.all_ratings(): ru, ri = self.add_to_known(u,i) self.K[ru,ri]=1 self.R[ru,ri]=rating self.U = agnp.random.normal(size = (n, self.latent_dimension)) self.M = agnp.random.normal(size = (self.latent_dimension, m)) self.C = agnp.array([[self.cat_products[self.from_known_ri(ri)] == c for c in range(len(self.cat_target))] for ri in range(m)]) self.fun_U = lambda U : (agnp.sum(self.K*(self.R - agnp.dot(U,self.M))**2)+ self.mu * (agnp.sum(U**2) + agnp.sum(self.M**2)) +self.lamb*agnp.sum((1/n * agnp.dot(agnp.dot(agnp.ones(n),agnp.dot(U, self.M)),self.C) -self.cat_target)**2)) self.fun_M = lambda M : (agnp.sum(self.K*(self.R - agnp.dot(self.U,M))**2)+ self.mu * (agnp.sum(self.U**2) + agnp.sum(M**2)) +self.lamb*agnp.sum((1/n * agnp.dot(agnp.dot(agnp.ones(n),agnp.dot(self.U, M)),self.C) -self.cat_target)**2)) self.grad_U = grad(self.fun_U) self.grad_M = grad(self.fun_M) for epoch in range(self.nb_main_epochs): self.M = gradient_descent(self.M, self.grad_M, N = 1, lr = self.lr, alpha = 1) self.U = gradient_descent(self.U, self.grad_U, N = 1, lr = self.lr, alpha = 1) self.lr*=self.alpha return self
def __init__(self): AlgoBase.__init__(self) self.rating_lookup_by_user = None self.u_mean = None self.dev = None self.cnts = None self.dev_svd = None
def fit(self, trainset): AlgoBase.fit(self, trainset) ml = MovieLens() genres = ml.getGenres() years = ml.getYears() print("Computing content-based similarity matrix") # Compute genre distance for every movie combination as a 2x2 matrix # create a matrix with all zeros and size of all entries in the dataset self.similarities = np.zeros( (self.trainset.n_items, self.trainset.n_items)) for this_rating in range(self.trainset.n_items): if (this_rating % 1000 == 0): print(this_rating, " of ", self.trainset.n_items) for next_rating in range(this_rating + 1, self.trainset.n_items): this_movieId = int(self.trainset.to_raw_iid(this_rating)) other_movieId = int(self.trainset.to_raw_iid(next_rating)) genreSimilarity = self.similarityBasedOnGenre( this_movieId, other_movieId, genres) yearSimilarity = self.similarityBasedOnYear( this_movieId, other_movieId, years) self.similarities[ this_rating, next_rating] = genreSimilarity * yearSimilarity self.similarities[next_rating, this_rating] = self.similarities[this_rating, next_rating] return self
def fit(self, trainset): AlgoBase.fit(self, trainset) # Compute item similarity matrix based on content attributes # Load up genre vectors for every movie movies = MoviesContent(False, False) genres = movies.getGenres() print("Computing content-based similarity matrix...") # Compute genre distance for every movie combination as a 2x2 matrix self.similarities = np.zeros( (self.trainset.n_items, self.trainset.n_items)) for thisRating in range(self.trainset.n_items): if thisRating % 100 == 0: print(thisRating, " of ", self.trainset.n_items) for otherRating in range(thisRating + 1, self.trainset.n_items): thisMovieID = int(self.trainset.to_raw_iid(thisRating)) otherMovieID = int(self.trainset.to_raw_iid(otherRating)) if len(genres[thisMovieID]) > 0 and len( genres[otherMovieID]) > 0: genreSimilarity = self.computeGenreSimilarity( thisMovieID, otherMovieID, genres) self.similarities[thisRating, otherRating] = genreSimilarity self.similarities[otherRating, thisRating] = self.similarities[ thisRating, otherRating] print("...done.") return self
def fit(self, trainset): AlgoBase.fit(self, trainset) ml = MovieLensData() genres = ml.returnGenres() years = ml.returnYears() mes = ml.returnMES() self.similarities = np.zeros( (self.trainset.n_items, self.trainset.n_items)) for thisRating in range(self.trainset.n_items): for otherRating in range(thisRating + 1, self.trainset.n_items): thisMovieID = int(self.trainset.to_raw_iid(thisRating)) otherMovieID = int(self.trainset.to_raw_iid(otherRating)) # print("Computing Genre Similarity") genreSimilarity = self.computeGenreSimilarity( thisMovieID, otherMovieID, genres) # print("Computing Year Similarity") yearSimilarity = self.computeYearSimilarity( thisMovieID, otherMovieID, years) # print("Computing Mise En Scene Similarity") mesSimilarity = self.computeMiseEnSceneSimilarity( thisMovieID, otherMovieID, mes) self.similarities[ thisRating, otherRating] = genreSimilarity * yearSimilarity * mesSimilarity self.similarities[otherRating, thisRating] = self.similarities[thisRating, otherRating] return self
def train(self, trainset): # Here again: call base method before doing anything. AlgoBase.train(self, trainset) user_num = self.trainset.n_users item_num = self.trainset.n_items rating = sparse.lil_matrix((user_num, item_num)) for u, i, r in self.trainset.all_ratings(): if r > 3: rating[u, i] = r self.A = st.SparseMatrix(rating) self.W = st.train( self.A, l1_ratio=self.l1_ratio, eps=self.eps, n_alphas=self.n_alphas, alphas=self.alphas, positive=self.positive, max_iter=self.max_iter) self.A = sparse.csc_matrix(self.A) self.W = sparse.csc_matrix(self.W) self.estimator = self.A * self.W
def fit(self, trainset): # Here again: call base method before doing anything. AlgoBase.fit(self, trainset) # Compute the average rating. We might as well use the trainset.global_mean. self.the_mean = np.mean( [r for (_, _, r) in self.trainset.all_ratings()]) return self
def fit(self, trainset): trainset.rating_scale = (1, 13) AlgoBase.fit(self, trainset) # print("Computing content-based similarity matrix...") # # # Compute genre distance for every movie combination as a 2x2 matrix # self.similarities = np.zeros((self.trainset.n_items, self.trainset.n_items)) # # for thisRating in range(self.trainset.n_items): # if (thisRating % 100 == 0): # print(thisRating, " of ", self.trainset.n_items) # # thisMovieID = int(self.trainset.to_raw_iid(thisRating)) # thisMovieGenreProfile = self.createMovieGenreProfile(thisMovieID) # thisMovieActorProfile = self.createMovieActorProfile(thisMovieID) # # thisMovieProfile= thisMovieActorProfile + thisMovieGenreProfile # # for otherRating in range(thisRating+1, self.trainset.n_items): # otherMovieID = int(self.trainset.to_raw_iid(otherRating)) # otherMovieGenreProfile = self.createMovieGenreProfile(otherMovieID) # otherMovieActorProfile = self.createMovieActorProfile(otherMovieID) # # otherMovieProfile=otherMovieActorProfile + otherMovieGenreProfile # # # similarity = self.computeSimilarity(thisMovieProfile, otherMovieProfile) # self.similarities[thisRating, otherRating] = similarity # self.similarities[otherRating, thisRating] = self.similarities[thisRating, otherRating] print("...done.") return self
def train(self, trainset): AlgoBase.train(self, trainset) self.sim = self.compute_similarities() raw_rating_df = pd.read_csv( 'new_ratings_all.txt', sep=':', header=None, names=['userid', 'movieid', 'rating', 'date', 'comment_level'], dtype={ 'movieid': np.str, 'userid': np.str }) self.user_comment_indictor = self.get_user_comment_indictor( raw_rating_df) self.user_rating_date_indictor = self.get_user_rating_date_indictor( raw_rating_df) self.user_professional_indictor = self.get_user_professional_indictor( self.user_comment_indictor, self.user_rating_date_indictor) self.inner_uid_2_professional = { iuid: self.user_professional_indictor['std'][ self.trainset.to_raw_uid(iuid)] for iuid, _ in self.trainset.ur.iteritems() }
def fit(self, trainset, movies): AlgoBase.fit(self, trainset) self.movies = movies mean_user_vectors = {} for u in trainset.ur: weights_sum = 0.0 user_movies = {} for movieId, score in trainset.ur[u]: movieRawId = trainset.to_raw_iid(movieId) if movieRawId in self.movies: user_movies[movieRawId] = self.movies[movieRawId] user_movies[movieRawId]["score"] = score sum_vector = [ 0.0 for _ in user_movies[list(user_movies)[0]]["embedding"] ] for _, movie in user_movies.items(): sum_vector = [ x + y * movie["score"] for x, y in zip(sum_vector, movie["embedding"]) ] weights_sum += score mean_vector = [x / weights_sum for x in sum_vector] mean_user_vectors[u] = mean_vector self.mean_user_vectors = mean_user_vectors return self
def fit(self,trainset): AlgoBase.fit(self,trainset) #self.the_mean = np.mean([r for (_,_,r) in self.trainset.all_ratings()]) self.verbose = False self.bu,self.bi = self.compute_baselines() self.sim = self.compute_similarities() return self
def fit(self, trainset): AlgoBase.fit(self, trainset) numUsers = trainset.n_users numItems = trainset.n_items trainingMatrix = np.zeros([numUsers, numItems, 10], dtype = np.float32) for (uid, iid, rating) in trainset.all_ratings(): adjustedRating = int(float(rating)*2.0) - 1 trainingMatrix[int(uid), int(iid), adjustedRating] = 1 trainingMatrix = np.reshape(trainingMatrix,[trainingMatrix.shape[0], - 1]) rbm = RBM(trainingMatrix.shape[1], hiddenDimensions = self.hiddenDim, learningRate = self.learningRate, batchSize = self.batchSize) rbm.Train(trainingMatrix) self.predictedRatings = np.zeros([numUsers, numItems], dtype = np.float32) for uiid in range(trainset.n_users): if(uiid % 50 == 0): print("Procissing user ", uiid) recs = rbm.GetRecommendations([trainingMatrix[uiid]]) recs = np.reshape(recs, [numItems, 10]) for itemID, rec in enumerate(recs): normalized = self.softmax(rec) rating = np.average(np.arange(10), weights = normalized) self.predictedRatings[uiid,itemID] = (rating + 1)* 0.5 return self
def fit(self, trainset): AlgoBase.fit(self, trainset) lf = LoadFoods() cuisines = lf.getCuisines() #orderTime = lf.getOrderTime() print("Now computing content-based similarity matrix. Please wait ...") # Compute genre distance for every movie combination as a 2x2 matrix self.similarities = np.zeros( (self.trainset.n_items, self.trainset.n_items)) for thisRating in range(self.trainset.n_items): if (thisRating % 100 == 0 and thisRating != 0): print("processed ", thisRating, " of ", self.trainset.n_items, " items") for otherRating in range(thisRating + 1, self.trainset.n_items): thisFoodID = (self.trainset.to_raw_iid(thisRating)) otherFoodID = (self.trainset.to_raw_iid(thisRating)) cuisineSimilarity = self.computeCuisineSimilarity( thisFoodID, otherFoodID, cuisines) #orderTimeSimilarity = self.computeOrderTimeSimilarity(thisFoodID,otherFoodID,orderTime) self.similarities[thisRating, otherRating] = cuisineSimilarity self.similarities[otherRating, thisRating] = self.similarities[thisRating, otherRating] print("done computing the matrix...") return self
def fit(self, trainset): # Here again: call base method before doing anything. AlgoBase.fit(self, trainset) self.algo.fit(trainset) if self.algo.verbose: print( 'Ignore the above similiary matrix generation message, its not used in this algorithm' ) print('Calculating AgreeTrust matrix ...') start = time.time() # tr, comon, noncom = agree_trust_opitmal_a_b(trainset, self.beta, self.epsilon, self.algo.sim, ptype=self.ptype, istrainset=True, activity=False) tr, comon, noncom = agree_trust_opitmal_a_b(trainset, self.beta, self.epsilon, self.algo.sim, ptype=self.ptype, istrainset=True, activity=False) self.algo.sim = tr**self.lambdak - (self.epsilon * noncom) # self.algo.sim[self.algo.sim > 1] = 1 print(time.time() - start) print('agree_trust_opitmal_a_b fit time') return self
def __init__(self, train_data): AlgoBase.__init__(self) self.model_selection = [[ 'baselineonly', BaselineOnly(bsl_options={ 'method': 'als', 'n_epochs': 25, 'reg_u': 5, 'reg_i': 3 }) ], ['svd', SVD(lr_all=0.01, n_epochs=25, reg_all=0.2)], [ 'coClustering', CoClustering(n_epochs=3, n_cltr_u=3, n_cltr_i=3) ], [ 'knn', KNNBasic(k=40, sim_options={ 'name': 'cosine', 'user_based': False }) ]] self.model_rmse = {} self.model_list = {} self.trainset = train_data.build_full_trainset()
def fit(self, trainset): AlgoBase.fit(self, trainset) for algorithm in self.algorithms: algorithm.fit(trainset) return self
def __init__(self, algorithms, weights): AlgoBase.__init__(self) if (sum(weights.values()) - 1) != 0: raise Exception("Attention, sum of weights need to be 1") # print("in constructor, algos:", type(algorithms)) self.algorithms = algorithms self.weights = weights
def __init__(self,cat_products, cat_target, lamb = 10000, latent_dimension = 10, mu = 10, lr =0.0005, alpha = 0.99, nb_epochs = 1, nb_main_epochs = 50): """ Cette méthode cherche à faire correspondre score moyen et performances Le résultat est très mauvais avec cette méthode """ AlgoBase.__init__(self) self.latent_dimension = latent_dimension self.lamb = 10 self.mu = 10 # Les informations pour la partnership self.cat_products = cat_products self.cat_target = cat_target # The AlgoBase got its own functions and dictionnaries to remember the # know uid and iid, but we choose to use our own to have a better control # on it as we don't need the estimate function self.u_to_raw = dict() self.i_to_raw = dict() self.raw_to_u = dict() self.raw_to_i = dict() self.number_raw_u = 0 self.number_raw_i = 0 #Parameters of the gradient descent self.lr = lr self.alpha = alpha self.nb_epochs = nb_epochs self.nb_main_epochs = nb_main_epochs
def __init__(self, max_rank, lmbda, max_iter=200, verbose=False): AlgoBase.__init__(self) self.max_rank = max_rank self.lmbda = lmbda self.max_iter = max_iter self.verbose = verbose
def fit(self, trainset): AlgoBase.fit(self, trainset) self.u_mean = defaultdict(lambda: trainset.global_mean) for u, ratings in trainset.ur.items(): self.u_mean[u] = np.mean([rating for i, rating in ratings]) return self
def train(self, trainset): # Here again: call base method before doing anything. AlgoBase.train(self, trainset) # Compute the average rating self.the_mean = np.mean([r for (_, _, r) in self.trainset.all_ratings()])
def fit(self, trainset): AlgoBase.fit(self, trainset) # Compute item similarity matrix based on content attributes # Load up genre vectors for every movie ml = Reviews () typeR = ml.getRestaurantType() print("Computing content-based similarity matrix...") # Compute genre distance for every movie combination as a 2x2 matrix self.similarities = np.zeros((self.trainset.n_items, self.trainset.n_items)) for thisRating in range(self.trainset.n_items): if (thisRating % 50 == 0): print(thisRating, " of ", self.trainset.n_items) for otherRating in range(thisRating+1, self.trainset.n_items): thisplaceID = int(self.trainset.to_raw_iid(thisRating)) otherplaceID = int(self.trainset.to_raw_iid(otherRating)) typeSimilarity = self.computeTypeSimilarity(thisplaceID, otherplaceID, typeR) distancesSimilarity = self.computeDistanceSimilarity(thisplaceID, otherplaceID) #mesSimilarity = self.computeMiseEnSceneSimilarity(thisMovieID, otherMovieID, mes) self.similarities[thisRating, otherRating] = typeSimilarity * distancesSimilarity self.similarities[otherRating, thisRating] = self.similarities[thisRating, otherRating] print("...done.") return self
def fit(self, trainset): AlgoBase.fit(self, trainset) ml = MovieLens() genres = ml.getGenres() years = ml.getYears() mes = ml.getMiseEnScene() print("Computing content-based similarity matrix...") self.similarities = np.zeros( (self.trainset.n_items, self.trainset.n_items)) for thisRating in range(self.trainset.n_items): if (thisRating % 100 == 0): print(thisRating, " of ", self.trainset.n_items) for otherRating in range(thisRating + 1, self.trainset.n_items): thisMovieID = int(self.trainset.to_raw_iid(thisRating)) otherMovieID = int(self.trainset.to_raw_iid(otherRating)) genreSimilarity = self.computeGenreSimilarity( thisMovieID, otherMovieID, genres) yeaarSimilarity = self.computeYearSimilarity( thisMovieID, otherMovieID, years) mesSimilarity = self.computeMiseEnSceneSimilarity( thisMovieID, otherMovieID, mes) self.similarities[ thisRating, otherRating] = genreSimilarity * yeaarSimilarity * mesSimilarity self.similarities[otherRating, thisRating] = self.similarities[thisRating, otherRating] print("...done.") return self
def fit(self, trainset): AlgoBase.fit(self, trainset) numUsers = trainset.n_users numItems = trainset.n_items trainingMatrix = np.zeros([numUsers, numItems], dtype=np.float32) for (uid, iid, rating) in trainset.all_ratings(): trainingMatrix[int(uid), int(iid)] = rating / 5.0 # Create an RBM with (num items * rating values) visible nodes autoRec = AutoRec(trainingMatrix.shape[1], hiddenDimensions=self.hiddenDim, learningRate=self.learningRate, batchSize=self.batchSize, epochs=self.epochs) autoRec.Train(trainingMatrix) self.predictedRatings = np.zeros([numUsers, numItems], dtype=np.float32) for uiid in range(trainset.n_users): if (uiid % 50 == 0): print("Processing user ", uiid) recs = autoRec.GetRecommendations([trainingMatrix[uiid]]) for itemID, rec in enumerate(recs): self.predictedRatings[uiid, itemID] = rec * 5.0 return self
def __init__(self, train_data, model_to_use=["baselineonly", "svd", "coClustering", "knn"]): """initialize class with full dataset and a set of base models to use""" AlgoBase.__init__(self) self.available_models = { "baselineonly": BaselineOnly( bsl_options={ "method": "sgd", "n_epochs": 30, "reg": 0.1, "learning_rate": 0.005 }), "svd": SVD(lr_all=0.005, n_factors=50, reg_all=0.1), "coClustering": CoClustering(n_epochs=3, n_cltr_u=3, n_cltr_i=3), "knn": KNNWithMeans(k=40, sim_options={ "name": "cosine", "user_based": False }), } self.model_selection = [] for model in model_to_use: self.model_selection.append([model, self.available_models[model]]) self.model_rmse = {} self.model_mae = {} self.model_list = {} self.trainset = train_data.build_full_trainset()
def __init__(self, n_factors=100, n_epochs=20, lr=0.005, reg=0.02, noise=0.01): """Se inicializa el algoritmo con los hiper-parámetros dados por parámetro. Se inicializan adicionalmente los demás factores a utilizar en RKMF con valores temporales Parámetros: n_factors: Número de factores que tendrán las matrices de usuarios e ítems. n_epochs: Número de epochs en los que se aplicará el Stochastic Gradient Descent. lr: El learning rate del modelo. reg: El valor de regularización del modelo. noise: Valor que se utiliza a la hora de inicializar las matrices de usuarios x factores e ítems x factores. """ self.n_factors = n_factors self.n_epochs = n_epochs self.lr = lr self.reg = reg self.noise = noise # Se inicializan los distintos factores relevantes en el algoritmo # con valores temporales self.init_low = 0.0 self.init_high = 5.0 self.kernel_a = 3.0 self.kernel_c = 1 self.pu = None self.qi = None AlgoBase.__init__(self)
def __init__(self, train): # Parameter Of The Model # Always call base method before doing anything. AlgoBase.__init__(self) self.training_args = config['training_args'] # instantiate the column index for both user and items self.indexer = ColumnIndexer(train, ['userId', 'itemId']) # index the train set self.train = self.indexer.transform(train) # get the number of distinct users and items self.number_of_users = len(set(train['userId'].values)) self.number_of_items = len(set(train['itemId'].values)) # create user item rating tuples train_users_items_ratings = ((train['userId' + '_indexed'].values, train['itemId' + '_indexed'].values), train['rating'].values) # instantiate the tf datasets self.train_dataset = tf.data.Dataset.from_tensor_slices( train_users_items_ratings) self.ncf = NeuralCF( self.number_of_users, self.number_of_items, self.training_args.user_dim, self.training_args.item_dim, self.training_args.hidden1_dim, self.training_args.hidden2_dim, self.training_args.hidden3_dim, self.training_args.hidden4_dim) self.ncf.compile( optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), loss=tf.keras.losses.MeanAbsoluteError())