def make_recs(self, _id, _id_type='movie', rec_num=5):
        '''
        given a user id or a movie that an individual likes to make recommendations.
        Input:
        _id - the user/movie id you want to predect for
        _id_type - the id _id_type
        rec_num - how many recommendation you want to provide
        Output:
        rec_names - (array) a list or numpy array of recommended movies by name
        '''
        rec_ids = create_ranked_df(self.movies, self.ratings_mat)

        if _id_type == 'user':
            if _id in self.ratings_mat.index:
                ind = np.where(self.ratings_mat.index == _id)[0][0]
                preds = np.dot(self.user_mat[ind, :], self.movie_mat)
                rec_inds = np.argsort(preds)[0 - rec_num:]
                rec_ids = self.ratings_mat.columns[rec_inds]
                rec_names = rf.get_movie_names(rec_ids)
            else:
                rec_names = rf.popular_recommendations(_id, rec_num, rec_ids)
        else:
            rec_ids = rf.find_similar_movies(_id)
            rec_names = rf.get_movie_names(rec_ids)

        return rec_names
Пример #2
0
    def make_recommendations(self, _id, dot_prod, _id_type='movie', rec_num=5):

        if _id_type == 'user':
            if _id in self.user_ids_series:
                message = 'Glad to see you again! recommended for you:\n'
                idx = np.where(self.user_ids_series == _id)[0][0]

                # predict items
                # take the dot product of that row and the V matrix
                preds = np.dot(self.user_mat[idx, :], self.item_mat)

                # pull the top items according to the prediction
                indices = preds.argsort()[-rec_num:][::-1]
                rec_ids = self.items_ids_series[indices]
                rec_names = rf.get_item_names(rec_ids, self.df_items,
                                              self.item_id_colname,
                                              self.item_name_colname)

            else:

                message = "Hey, you are new here, this is for you:\n"
                # if we don't have this user, give just top ratings back
                rec_names = rf.popular_recommendations(_id, self.ranked_items,
                                                       rec_num)
        else:
            if _id in self.items_ids_series:
                message = 'Similar items for this rated item:\n'
                rec_names = (list(
                    rf.find_similar_items(_id, self.df_items,
                                          self.item_name_colname,
                                          dot_prod))[:rec_num])
            else:
                print("Please update the database with this item")

        return rec_ids, rec_names, message
    def make_recs(self, _id, _id_type='movie', rec_num=5):
        """
        given a user id or a movie that an individual likes
        make recommendations

        INPUT:
        _id - either a user or movie id (int)
        _id_type - "movie" or "user" (str)
        rec_num - number of recommendations to return (int)

        OUTPUT:
        rec_ids - (array) a list or numpy array of recommended movies by id
        rec_names - (array) a list or numpy array of recommended movies by name
        """

        # if the user is available from the matrix factorization data,
        # I will use this and rank movies based on the predicted values
        # For use with user indexing

        val_users = self.train_data_df.index
        rec_ids, rec_names = None, None

        if _id_type == 'user':
            if _id in self.train_data_df.index:

                # Get the index of which row the user is in for use in U matrix
                idx = np.where(val_users == _id)[0][0]

                # take the dot product of that row and the V matrix
                preds = np.dot(self.user_mat[idx, :], self.movie_mat)

                # pull the top movies according to the prediction
                indices = preds.argsort()[-rec_num:][::-1]  # indices
                rec_ids = self.train_data_df.columns[indices]
                rec_names = rf.get_movie_names(rec_ids, self.movies)

            else:
                # if we don't have this user, give just top ratings back
                rec_names = rf.popular_recommendations(_id, rec_num,
                                                       self.ranked_movies)
                print(
                    "Because this user wasn't in our database, we are giving back the top movie "
                    "recommendations for all users.")

        # Find similar movies if it is a movie that is passed
        else:
            if _id in self.train_data_df.columns:
                rec_names = list(rf.find_similar_movies(_id,
                                                        self.movies))[:rec_num]
            else:
                print(
                    "That movie doesn't exist in our database.  Sorry, we don't have any recommendations for you."
                )

        return rec_ids, rec_names
Пример #4
0
    def make_recs(self, _id, _id_type='movie', rec_num=5):
        '''
        given a user id or a movie that an individual likes
        make recommendations
        INPUT:
        _id - either a user or movie id (int)
        _id_type - "movie" or "user" (str) (defult 'movie')
        rec_num - number of recommendations to return (int) (defult 5)

        OUTPUT:
        rec_ids - (array) a list or numpy array of recommended movies by id                  
        rec_names - (array) a list or numpy array of recommended movies by name
        '''
        if _id_type == 'movie':
            try:
                rec_names = rf.find_similar_movies(_id, self.movies)[:rec_num]
                rec_ids = self.movies[self.movies['movie'].isin(
                    rec_names)]['movie_id'].values[:rec_num]
            except:
                print('movie not in dataset')
                rec_ids, rec_names = None, None
        else:
            if _id in self.train_data_df.index:
                # find row in user_mat
                user = np.where(self.train_data_df.index == _id)[0][0]
                # preidct rateing on user with all movies
                pre = np.dot(self.user_mat[user, :], self.movie_mat)
                # get movies indices of top rec_num records
                indices = np.argsort(pre)[::-1][:rec_num]
                # get movie ids with index
                rec_ids = self.train_data_df.columns[indices].values
                # get movie names
                rec_names = rf.get_movie_names(rec_ids, self.movies)
            else:
                rec_names = rf.popular_recommendations(_id, rec_num,
                                                       self.ranked_movies)
                rec_ids = self.movies[self.movies['movie'].isin(
                    rec_names)]['movie_id'].values[:rec_num]
                print(
                    "Because this user wasn't in our database, we are giving back the top movie recommendations for all users."
                )
        return rec_ids, rec_names
    def make_recommendations(self, _id, _id_type='movie', rec_num=5):
        """
        given a user id or a movie that an individual likes
        make recommendations
        """
        rec_ids, rec_names = None, None

        if _id_type == 'user':
            if _id in self.user_ids_series:
                # Get the index of which row the user is in for use in U matrix
                idx = np.where(self.user_ids_series == _id)[0][0]

                # take the dot product of that row and the V matrix
                preds = np.dot(self.user_matrix[idx, :], self.movie_matrix)

                # pull the top movies according to the prediction
                indices = preds.argsort()[-rec_num:][::-1]  #indices
                rec_ids = self.movie_ids_series[indices]
                rec_names = rf.get_movie_names(rec_ids, self.movies)

            else:
                # if we don't have this user, give just top ratings back
                rec_names = rf.popular_recommendations(_id, rec_num,
                                                       self.ranked_movies)
                print(
                    "Because this user wasn't in our database, we are giving back the top movie recommendations for all users. (Cold Start Problem)"
                )

        # Find similar movies if it is a movie that is passed
        else:
            if _id in self.movie_ids_series:
                rec_names = list(rf.find_similar_movies(_id,
                                                        self.movies))[:rec_num]
            else:
                print(
                    "That movie doesn't exist in our database.  Sorry, we don't have any recommendations for you."
                )

        return rec_ids, rec_names
Пример #6
0
    def make_recs(self,_id, _id_type='user', rec_num=5):
        '''
        INPUT:
        _id - either a user or movie id (int)
        _id_type - "movie" or "user" (str)
        rec_num - number of recommendations to return (int)

        OUTPUT:
        recs - (array) a list or numpy array of recommended movies like the
                       given movie, or recs for a user_id given
        '''
        rec_ids, rec_names = None, None
        if _id_type == 'user':
            if _id in self.user_ids_series:
                # Get the index of which row the user is in for use in U matrix
                idx = np.where(self.user_ids_series == _id)[0][0]
                
                # take the dot product of that row and the V matrix
                preds = np.dot(self.user_mat[idx,:],self.movie_mat)
                
                # pull the top movies according to the prediction
                indices = preds.argsort()[-rec_num:][::-1] #indices
                rec_ids = self.user_item_df.columns[indices]
                rec_names = rf.get_movie_names(rec_ids, self.movies)
                
            else:
                # if we don't have this user, give just top ratings back
                rec_names = rf.popular_recommendations(_id, rec_num, self.ranked_movies)
                
        # Find similar movies if it is a movie that is passed
        else:
            if _id in self.movie_ids_series:
                rec_names = list(rf.find_similar_movies(_id, self.movies))[:rec_num]
            else:
                print("That movie doesn't exist in our database.  Sorry, we don't have any recommendations for you.")
    
        return rec_ids, rec_names
    def make_recommendations(self,
                             _id,
                             dot_prod_user,
                             tfidf_matrix,
                             _id_type='item',
                             rec_num=5):
        """
		This function make recommendations for a particular user or a
		particular item regarding the value that you've putted in
		the _id_type argument.
    
		If you choose _id_type='user':
		the _id argument will be considered as a user id and the
		recommendation is given using matrix factorization if the user
		has already rated some movies before. If the user is a new user
		the recommendation is given using the most popular movies in
		the data (Ranked based recommendation).
    
		If you choose _id_type='item':
		the _id argument will be considered as a item id and the
		recommendation is given using similarity between movies if the
		item exist in the data (Content Based Recommendation).
		If the item is not present in the data (so no information
		about the genre, years, ect.) it will return a message to
		update the data with this item.

		Input:
		- _id: either a user or item id (int)
		- dot_prod_user: the dot product matrix computed by your own
		to find similar users
		- _id_type: either 'user' or 'item', Default:'item' (str)
		- rec_num: number of recommendation that you want
		Default:5 (int)

		Output:
		- recommendation ids
		- recommendation names
		- and a personalized message
		"""

        if _id_type == 'user':
            if _id in self.user_ids_series:
                message = 'Glad to see you again! recommended for you:\n'
                idx = np.where(self.user_ids_series == _id)[0][0]

                # predict items
                # take the dot product of that row and the V matrix
                preds = np.dot(self.user_mat[idx, :], self.item_mat)

                # pull the top items according to the prediction
                indices = preds.argsort()[-rec_num:][::-1]
                rec_ids = self.items_ids_series[indices]
                rec_names = rf.get_item_names(rec_ids, self.df_items,
                                              self.item_id_colname,
                                              self.item_name_colname)

                rec_user_user_ids = rf.find_similar_user(
                    _id, self.df_reviews, self.user_id_colname, dot_prod_user)

                rec_user_item_names = rf.user_user_cf(rec_user_user_ids,
                                                      self.user_item_df,
                                                      self.df_reviews,
                                                      self.item_id_colname,
                                                      self.item_name_colname)

            else:

                message = "Hey, you are new here, this is for you:\n"
                # if we don't have this user, give just top ratings back
                rec_ids = rf.popular_recommendations(_id, self.ranked_items,
                                                     self.item_id_colname,
                                                     rec_num)

                rec_names = rf.get_item_names(rec_ids, self.df_items,
                                              self.item_id_colname,
                                              self.item_name_colname)

                rec_user_user_ids = None
                rec_user_item_names = None

        else:
            if _id in self.items_ids_series:

                name_item_for_message = rf.get_item_names(
                    [_id], self.df_items, self.item_id_colname,
                    self.item_name_colname)

                message = (f"Similar items for id:{_id}, corresponding to "
                           f"{name_item_for_message[0]}:\n")

                rec_ids = (rf.find_similar_items(_id, self.df_items,
                                                 self.item_id_colname,
                                                 tfidf_matrix))[:rec_num]

                rec_names = rf.get_item_names(rec_ids, self.df_items,
                                              self.item_id_colname,
                                              self.item_name_colname)

                rec_user_user_ids = None
                rec_user_item_names = None

            else:

                message = (
                    "We can't make recommendation for this item, please make"
                    "sure the data was updated with this item.\n")
                rec_ids = None
                rec_names = None
                rec_user_user_ids = None
                rec_user_item_names = None

        return rec_ids, rec_names, message, rec_user_user_ids, rec_user_item_names
Пример #8
0
	def make_recommendations(self, _id, _id_type='item', rec_num=5, latent_features=12, learning_rate=0.001, iters=10):
		"""
		This function make recommendations for a particular user or a
		particular item regarding the value that you've putted in
		the _id_type argument.

		If you choose _id_type='user':
		the _id argument will be considered as a user id and the
		recommendation is given using matrix factorization if the user
		has already rated some movies before. If the user is a new user
		the recommendation is given using the most popular movies in
		the data (Ranked based recommendation).

		If you choose _id_type='item':
		the _id argument will be considered as a item id and the
		recommendation is given using similarity between movies if the
		item exist in the data (Content Based Recommendation).
		If the item is not present in the data (so no information
		about the genre, years, ect.) it will return a message to
		update the data with this item.

		Input:
		- _id: either a user or item id (int)
		- dot_prod_user: the dot product matrix computed by your own
		to find similar users
		- _id_type: either 'user' or 'item', Default:'item' (str)
		- rec_num: number of recommendation that you want
		Default:5 (int)

		Output:
		- recommendation ids
		- recommendation names
		- and a personalized message
		"""


		self.latent_features = latent_features
		self.learning_rate = learning_rate
		self.iters = iters


		user_item_reset = self.user_item_grouped.reset_index()
		self.user_ids = user_item_reset[self.user_id_colname].unique()

		current_user = (
			user_item_reset[user_item_reset[self.user_id_colname] == _id]
		)
		current_user = (
			current_user.groupby([self.user_id_colname,
								  self.item_id_colname])[self.rating_col_name].max()
		)
		current_user_item_df = current_user.unstack()
		self.current_user_item_df = current_user_item_df
		self.user_item_mat = np.array(self.current_user_item_df)

		# Set up some useful values for later
		self.n_users = self.user_item_mat.shape[0]
		self.n_items = self.user_item_mat.shape[1]
		self.num_ratings = np.count_nonzero(~np.isnan(self.user_item_mat))

		self.user_ids_series = np.array(user_item_reset[self.user_id_colname].unique())
		self.items_ids_series = np.array(user_item_reset[self.item_id_colname].unique())

		print('Train data with Funk Singular Value Decomposition...')
		#### FunkSVD ####

		# initialize the user and item matrices with random values
		user_mat = np.random.rand(self.n_users, self.latent_features)
		item_mat = np.random.rand(self.latent_features, self.n_items)

		sse_accum = 0

		print("Iterations \t\t Mean Squared Error ")

		for iteration in range(self.iters):
			old_sse = sse_accum
			sse_accum = 0

			for i in range(self.n_users):
				for j in range(self.n_items):

					# if the rating exists (so we train only on non-missval)
					if self.user_item_mat[i, j] > 0:
						# compute the error as the actual minus the dot
						# product of the user and item latent features
						diff = (
							self.user_item_mat[i, j]
							- np.dot(user_mat[i, :], item_mat[:, j])
						)
						# Keep track of the sum of squared errors for the
						# matrix
						sse_accum += diff**2

						for k in range(self.latent_features):
							user_mat[i, k] += (
								self.learning_rate * (2*diff*item_mat[k, j])
							)

							item_mat[k, j] += (
								self.learning_rate * (2*diff*user_mat[i, k])
							)

			print(f"\t{iteration+1} \t\t {sse_accum/self.num_ratings} ")

			self.mse=sse_accum/self.num_ratings



		# Create ranked items
		self.ranked_items = rf.ranked_df(self.df_reviews,
										 self.item_id_colname,
										 self.rating_col_name,
										 self.date_col_name)




		if _id in self.user_ids_series:
			message = 'Glad to see you again! recommended for you:\n'
			idx = np.where(self.user_ids_series == _id)[0][0]

			# predict items
			# take the dot product of that row and the V matrix
			preds = np.dot(user_mat[idx,:],item_mat)

			# pull the top items according to the prediction
			indices = preds.argsort()[-rec_num:][::-1]
			rec_ids = self.items_ids_series[indices]
			rec_names = rf.get_item_names(rec_ids,
										  self.df_items,
										  self.item_id_colname,
										  self.item_name_colname)

		else:

			message = "Hey, you are new here, this is for you:\n"
			# if we don't have this user, give just top ratings back
			rec_ids = rf.popular_recommendations(_id,
												 self.ranked_items,
												 self.item_id_colname,
												 rec_num)

			rec_names = rf.get_item_names(rec_ids,
										  self.df_items,
										  self.item_id_colname,
										  self.item_name_colname)

		return rec_ids, rec_names, message