class SVDModel:
    def __init__(self):
        self.model = SVD()
        self.name = 'Singular Value Decomposition'

    def best_estimator_gridsearchCV(self,
                                    data,
                                    n_epochs=[5, 10],
                                    lr_all=[0.002, 0.005],
                                    reg_all=[0.4, 0.5],
                                    cv=3):
        param_grid = {
            'n_epochs': n_epochs,
            'lr_all': lr_all,
            'reg_all': reg_all
        }
        gs = GridSearchCV(self.model, param_grid, measures=['rmse'], cv=cv)
        gs.fit(data)
        gs.best_params['rmse']
        return params

    def train(self, *args, **kwargs):
        self.model.fit(*args, **kwargs)

    def predict(self, *args, **kwargs):
        self.model.predict(*args, **kwargs)

    def test(self, *args, **kwargs):
        return self.model.test(*args, **kwargs)
def fill_missing_svd(df_train,df_test):

    df = pd.read_csv(
        '/Users/ronlitman/Ronlitman/University/Statistic/שנה א׳ - סמט׳ א׳/למידה סטטיסטית/Netflix/df_join.csv')

    df = df[df.iid != 100]
    reader = Reader(rating_scale=(1.0, 5.0))
    data = Dataset.load_from_df(df[['uid', 'iid', 'rating']], reader)
    trainset = data.build_full_trainset()
    algo = SVD()

    print('fitting SVD')
    algo.fit(trainset)

    print('filling train set')
    for i in range(df_train.shape[0]):
        for j in range(df_train.shape[1]):
            if (df_train.iloc[i,j] == 0):
                df_train.iloc[i, j] = (algo.predict(i, j).est)

    print('filling test set')
    for i in range(df_test.shape[0]):
        for j in range(df_test.shape[1]):
            if (df_test.iloc[i,j] == 0):
                df_test.iloc[i, j] = (algo.predict(i + 10000, j).est)

    return df_train, df_test
def get_recommended_movies(user_id):
    '''
    This function is to get top 10 recommended movies based on user similarities
    Input: user id (integer)
    Output: A dataframe contains 10 recommended movies (information about these movie included)
    '''
    already_watched = list(RATINGS[RATINGS['userId'] == user_id]['movieId'])
    predicted_est = {}
    id_set = []
    for i in MOVIE_ID_SORT:
        if i not in already_watched:
            predicted_est[i] = SVD.predict(user_id, i).est
        else:
            predicted_est[i] = 0
    predicted_est = sorted(predicted_est.items(),
                           key=lambda x: x[1],
                           reverse=True)
    for i in predicted_est:
        if len(id_set) < 10:
            if i[0] in FULL_META['id'].to_list():
                id_set.append(i[0])
    recommendation = FULL_META[FULL_META['id'].isin(id_set)][[
        'title', 'id'
    ]].values.tolist()
    return recommendation
示例#4
0
def boost(examples, rounds=10):
    distr = normalize([1.] * l)
    hypotheses = [None] * rounds
    alpha = [0] * rounds

    for t in range(rounds):

        #create a training set based on the weight distribution
        for i in range(l):
            examples[i] = examples[draw(distr)]

        # create a trainset object
        reader = Reader()
        data = Dataset.load_from_df(examples, reader)
        trainset = data.build_full_trainset()

        # Use SVD with surprise
        algo = SVD()algo.train(trainset)
        hypotheses[t] = algo

        for i in range(l):
            abserr[i] = math.abs(examples.at[i,'rating'] - algo.predict(examples.at[i,'user_id'],examples.at[i,'business_id']).est)

        # update weights 
        delta = sum(x*y for x,y in zip(distr,abserr) if abserr > delta)
        hypRes = np.where(abserr > delta,-1,1)
        alpha[t] = 0.5 * math.log((1 - delta) / (.0001 + delta))

        distr = normalize([d * math.exp(-alpha[t] * h) for (d,h) in zip(distr, hypRes)]) 

       
    def finalHypothesis(x):
        return sign(sum(a * h(x) for (a, h) in zip(alpha, hypotheses))) 

    return finalHypothesis
def personalized_shows(username):
    ratings = pd.read_csv(rating_path)
    reader = Reader()
    data = Dataset.load_from_df(ratings[['username', 'show_id', 'rating']],
                                reader)
    data.split(n_folds=10)
    svd = SVD()
    evaluate(svd, data, measures=['RMSE'])
    temp = []
    obj = Show.objects.all()
    for i in obj:
        temp.append(
            [i.show_id, i.show_title,
             svd.predict(username, i.show_id).est])
    temp.sort(key=lambda x: x[2], reverse=True)
    ans = []
    rated = Show_Rating.objects.filter(username=username)
    already_rated = []
    for i in rated:
        already_rated.append(i.show_id)
    j = 0
    for i in temp:
        if (j > 11):
            break
        if (i[0] not in already_rated):
            ans.append(i[1])
            j += 1

    final = []
    for i in ans:
        final.append(get_show_details(i))
    return final
def get_collab_recommendation(dataset, userid):

    dataframe = pd.DataFrame(dataset)
    reader = Reader(rating_scale=(1, 5))
    dataset = Dataset.load_from_df(dataframe[['userId', 'itemId', 'rating']],
                                   reader)
    trainset = dataset.build_full_trainset()

    algo = SVD()
    algo.fit(trainset)

    user_ratings = np.zeros(trainset.n_items)

    for index, row in dataframe[dataframe['userId'] == userid].iterrows():
        user_ratings[row['itemId']] = row['rating']

    result = []
    for index, x in enumerate(user_ratings):
        if x == 0:
            prediction = algo.predict(userid, index)
            # Append (itemID, pred_rating)
            result.append((index, prediction.est))

    result.sort(key=lambda tup: tup[1], reverse=True)

    result = result[:9]
    result = [i for i, x in result]

    return result
class CollaborativeFiltering:
    def __init__(self, rating_df, user_df, movie_df, movie_sim_matrix=None):
        self._df = movie_df
        self._rating_df = rating_df
        self._movie_sim = movie_sim_matrix
        self._theta_m_u = np.zeros([movie_df.shape[0], user_df.shape[0]],
                                   dtype=np.float32)
        # self._x_m_n = np.zeros([df.shape[0], feature_n], dtype=np.float32)
        self._movie_indices = pd.Series(movie_df.index, index=movie_df['id'])
        self._user_indices = pd.Series(user_df.index,
                                       index=user_df['id'])  # 构造反向映射
        self._algo = SVD()

    def _get_sim_user(self, user_id, top_n=10):
        pass

    def _get_sim_movie(self, movie_id, top_n=10):
        pass

    def calculate(self):
        # for item in self._rating_df.to_numpy():
        #     self._theta_m_u[self._movie_indices[int(item[0])], self._user_indices[int(item[1])]] = item[2]

        # cosine_similarity(self._theta_m_u.T, self._theta_m_u.T)
        # cosine_similarity(self._theta_m_u, self._theta_m_u)
        reader = Reader()
        data = Dataset.load_from_df(
            self._rating_df[['user_id', 'movie_id', 'score']], reader)
        trainset = data.build_full_trainset()
        self._algo.fit(trainset)

    def get_results(self, user_id, movie_id):
        return self._algo.predict(user_id, movie_id).est
def svd(data, kwargs):
    # Set algorithm
    n_factors     = kwargs.get('k_features')
    n_epochs      = kwargs.get('maxiter')
    lr_pu         = kwargs.get('lr_pu')
    lr_qi         = kwargs.get('lr_qi')
    reg_bu        = kwargs.get('reg_bu')
    reg_qi        = kwargs.get('reg_qi')
    
    
    algo = SVD(n_factors[0], n_epochs, 
               lr_pu[0]    , lr_qi[0]   , 
               reg_bu[0]   , reg_qi[0]  , 
               random_state = kwargs['random_seed'] )
    
    # Train the algorithm on the data, and predict ratings for the testset
    algo.fit(data)
     
    # Predict the full matrix
    prediction = np.zeros([10000,1000])
    for row in range(10000):
        for col in range(1000):
            prediction[row,col] = algo.predict(str(row+1),str(col+1)).est
            
    return prediction
def generate_recommendations():
    file_path = os.path.expanduser('./data/reviews_stars.csv')
    reader = Reader(line_format='user item rating', sep=',')
    data = Dataset.load_from_file(file_path, reader=reader)

    trainset = data.build_full_trainset()

    algo = SVD(n_factors=5,
               n_epochs=25,
               lr_all=0.006,
               reg_all=0.2,
               biased=True)
    algo.fit(trainset)

    df = pd.read_csv('./data/reviews_stars.csv',
                     header=None,
                     names=['user', 'business', 'review'])
    businesses = df['business'].unique()
    users = df.groupby('user')['business'].nunique()

    with open('./data/collaborative_recomendations.csv', 'w') as file:
        for user, user_count in users.items():
            for business in businesses:
                if user_count > 12:
                    pred = algo.predict(user, business)
                    if pred.est > 4.2:
                        file.write(user + ',' + business + ',' +
                                   str(pred.est) + '\n')
            print(user)
示例#10
0
def predict_movie(user_id, movie_list, df):
    # view historical preference of the user
    temp_usr = df[(df['Cust_Id'] == user_id) & (df['Rating'] == 5)]
    temp_usr = temp_usr.set_index('Movie_Id')
    temp_usr = temp_usr.join(df_title)['Name']
    print("Movies Previously liked by user.....................")
    print(temp_usr[:10])

    # create svd model to predict movies for user
    user = movie_list.copy()
    user = user.reset_index()
    user = user[~user['Movie_Id'].isin(drop_movie_list)]

    # getting dataset
    reader = Reader()
    data = Dataset.load_from_df(
        df[['Cust_Id', 'Movie_Id', 'Rating']][:1000000], reader)

    trainset = data.build_full_trainset()
    svd = SVD()
    svd.fit(trainset)

    user['Estimate_Score'] = user['Movie_Id'].apply(
        lambda x: svd.predict(user_id, x).est)

    user = user.drop('Movie_Id', axis=1)

    user = user.sort_values('Estimate_Score', ascending=False)
    print("Recommended Movies for User are as follows.........\n")
    print(user.head(10))
示例#11
0
def train_test(df):

    ###################### train ######################
    reader = Reader()
    svd = SVD()

    border_line = int(df.shape[0] / 5)

    data = Dataset.load_from_df(
        df[['Cust_Id', 'Movie_Id', 'Rating']][:-border_line], reader)

    # train 1: cross_validate
    # model_selection.cross_validate(svd, data, measures=['RMSE', 'MAE'], cv=3, verbose=True)

    # train 2: not cross_validate
    trainset = data.build_full_trainset()
    svd.fit(trainset)

    ###################### test ######################
    test_df = df.iloc[-border_line:]

    print('test_df Shape: {}'.format(test_df.shape))
    data_matrix = np.array(test_df, dtype=np.int)

    Estimate_Score = []
    for user in data_matrix:
        Score = svd.predict(user[0], user[2]).est
        Estimate_Score.append(Score)

    loss = RMSE(Estimate_Score, data_matrix[:, 1])

    return loss
示例#12
0
def get_new_user_recommend(ratings, movies, USER):
    movies['genres'] = movies['genres'].fillna('[]').apply(literal_eval).apply(
        lambda x: [i['name'] for i in x] if isinstance(x, list) else [])
    movies['year'] = (pd.to_datetime(
        movies['release_date'], errors='coerce').apply(
            lambda x: str(x).split('-')[0] if x != np.nan else np.nan))
    movies.drop(movies.columns.difference(
        ['movieId', 'title', 'genres', 'year']),
                1,
                inplace=True)
    movies.set_index('movieId', inplace=True)
    reader = Reader(rating_scale=(0.5, 5))
    data = Dataset.load_from_df(ratings[['userId', 'movieId', 'rating']],
                                reader)
    svd = SVD(
    )  #(n_factors=160, n_epochs=100, lr_all=0.005, reg_all=0.1) 0.86?
    #cross_validate(svd, data, measures=['RMSE', 'MAE'], cv = 5)
    user_ratings = ratings[(ratings['userId'] == USER)]
    user_ratings = user_ratings.set_index('movieId')
    user_ratings = user_ratings.join(movies)
    user_ratings.drop(user_ratings.columns.difference(
        ['movieId', 'title', 'genres', 'year']),
                      1,
                      inplace=True)
    movies_cut = movies[~movies.isin(user_ratings)].dropna()
    trainset = data.build_full_trainset()
    svd.fit(trainset)
    file_name = os.path.abspath('app/static/dump_file')
    dump.dump(file_name, algo=svd)
    user_predict = movies_cut.copy()
    user_predict = user_predict.reset_index()
    user_predict['Estimate_Score'] = user_predict['movieId'].apply(
        lambda x: svd.predict(USER, x).est)
    user_predict = user_predict.sort_values('Estimate_Score', ascending=False)
    return user_predict
示例#13
0
def surpriseTesting():
    """scikit-surprise library testing"""
    # Load the movielens-100k dataset (download it if needed),
    # and split it into 3 folds for cross-validation.
    data = surprise.Dataset.load_builtin('ml-100k')

    #  reader = surprise.Reader(line_format='user item rating', sep=',')
    #  data = Dataset.load_from_file('temp.csv', reader=reader)

    trainSet = data.build_full_trainset()
    data.split(n_folds=3)
    for rating in data.build_full_trainset().all_ratings():
        print(rating)

    print(trainSet.n_items)
    algo = SVD()
    #  algo = KNNBasic()
    algo.fit(trainSet)
    # Evaluate performances of our algorithm on the dataset.
    perf = surprise.evaluate(algo, data, measures=['RMSE', 'MAE'])

    surprise.print_perf(perf)
    uid = str(
        196)  # raw user id (as in the ratings file). They are **strings**!
    iid = str(
        242)  # raw item id (as in the ratings file). They are **strings**!

    # get a prediction for specific users and items.
    pred = algo.predict(uid, iid, r_ui=-1, verbose=True)
    print(pred.est)
示例#14
0
    def PMFB(self):
        u_id = []
        I_id = []
        r_ui_ = np.array([])
        _est = np.array([])

        algo = SVD(n_factors=100,
                   n_epochs=20,
                   biased=True,
                   lr_all=0.005,
                   reg_all=0.02)
        algo.fit(self.trainset)

        for uid in (self.list):
            lids = self.data[self.data.uid == uid]
            a = self.data[self.data.uid == uid]

            for i in range(1, len(a)):
                lid = lids[i - 1:i].lid.values[0]
                r_ui = lids[i - 1:i].rate.values[0]
                pred = algo.predict(uid, lid, r_ui, verbose=True)
                u_id.append(int(pred.uid))
                I_id.append(int(pred.iid))
                r_ui_ = np.append(r_ui_, pred.r_ui)
                _est = np.append(_est, pred.est)

        self.df_est = pd.DataFrame({
            'uid': u_id,
            'Iid': I_id,
            'r_ui': r_ui_,
            'est': _est
        })
        self.arr = self.df_est['uid'].unique()

        self.PMFWB_ndcg_ = self.Calculate_NDCG()
示例#15
0
def do(user_i, df):
    global item_base
    item_base = set(df["장소"])

    reader = Reader(rating_scale=(1, 5))
    data = Dataset.load_from_df(df=df, reader=reader)
    train = data.build_full_trainset()
    test = train.build_testset()
    model = SVD(n_factors=100, n_epochs=20)
    model.fit(train)

    L = []
    actual_rating = 0
    for item_id in item_base:
        predictions = model.predict(user_i, item_id, actual_rating)
        if predictions[3] > 3:
            L.append(predictions)

    result = pd.DataFrame(L)
    result.sort_values(["est"], ascending=False, inplace=True)
    result = result.loc[:, ["uid", "iid", "est"]]
    result["주소"] = result["iid"].map(lambda x: x.split('*')[1])
    result["iid"] = result["iid"].map(lambda x: x.split('*')[0])
    result.reset_index(drop=True, inplace=True)
    result.columns = ["name", "place", "rating", "region"]
    return result[:100]


# def do(user_i, df) :
#     global item_base
#     item_base = set(df["장소"])
#     return svd_recommend(user_i)
示例#16
0
文件: test.py 项目: ssleap/bication
    def model(self, alg_key):

        reader = Reader(rating_scale = (1, 5))

        data_result = Dataset.load_from_df(self.make_df()[['user_id', 'place_id', 'score']], reader)

        # split data into 5 folds

        data_result.split(n_folds=10)

        # evaluation

        if alg_key.lower() == "svd":
            alg = SVD()
        elif alg_key.lower() == "knn":
            alg = KNNBasic()
        elif alg_key.lower() == "nmf":
            alg = NMF()

        evaluate(alg, data_result, measures=['RMSE', 'MAE'])

        # prediction
        # user_0	smallShop_5645	2
        test_user = '******'
        test_id = 'smallShop_7089'
        real_score = 4

        trainset = data_result.build_full_trainset()

        alg.train(trainset)
        print(alg.predict(test_user, test_id, real_score))
示例#17
0
 def craete_personal_value(self):
     # surprise 라이브러리의 Reader
     reader = Reader()
     path = os.path.abspath('')
     fname = '\com_dayoung_api\cop\mov\model\data\\ratings_small.csv'
     ratings = pd.read_csv(path + fname, encoding='utf-8')
     data = Dataset.load_from_df(ratings[['userId', 'movieId', 'rating']],
                                 reader)
     trainset = data.build_full_trainset()
     testset = trainset.build_testset()
     svd = SVD()
     svd.fit(trainset)
     predictions = svd.test(testset)
     accuracy.rmse(predictions)
     ratings[ratings['userId'] == 1]
     svd.predict(1, 302, 3)
     return (svd)
示例#18
0
def hybrid_recommendation(title:str, userId: int, df:DataFrame, cosine_sim:np.ndarray, svd:SVD, links:pd.DataFrame) -> pd.DataFrame:
    movies = get_popular_recomandation(title, df, cosine_sim)
    for i in range(len(movies['id'])):
        movies['id'].iloc[i] = links[(links['tmdbId'] == movies['id'].iloc[i])]['movieId']
    movies['est'] = movies['id'].apply(lambda x: 0)
    for i in range(len(movies['id'])):
        movies['est'].iloc[i] = svd.predict(userId, movies['id'].iloc[i]).est
    movies = movies.sort_values('est', ascending=False)
    return movies
示例#19
0
def recomendacion(usuario):
    array = []
    for rate in Calificacion.objects.all():
        array.append([rate.usuario_id, rate.asignatura_id, rate.calificacion])

    df = pd.DataFrame(data=array)
    reader = Reader(rating_scale=(0, 10))
    data = Dataset.load_from_df(df, reader)
    trainingSet = data.build_full_trainset()
    param_grid = {
        'n_factors': [50, 100, 150],
        "n_epochs": [40, 50, 60],
        "lr_all": [0.002, 0.005],
        "reg_all": [0.4, 0.6]
    }

    gs = GridSearchCV(SVD, param_grid, measures=["rmse", "mae"], cv=3)
    gs.fit(data)
    #Parametros optimos
    params = gs.best_params["rmse"]
    SVDoptimized = SVD(n_factors=params['n_factors'],
                       n_epochs=params['n_epochs'],
                       lr_all=params['lr_all'],
                       reg_all=params['reg_all'])
    SVDoptimized.fit(trainingSet)

    asig = Asignatura.objects.all()

    asig_user = Calificacion.objects.all().filter(usuario_id=usuario.id)

    #Asignaturas sin calificar
    asignaturas_SinC = []
    for asignatura in asig:
        encontrado = False
        for asignatura_usuario in asig_user:
            if (asignatura_usuario.asignatura_id == asignatura.codigo):
                encontrado = True
        if (not encontrado):
            asignaturas_SinC.append(asignatura)

    #asignaturas_recomendados
    asignaturas_rec = []

    for asignatura in asignaturas_SinC:
        asignaturas_rec.append({
            'asignatura':
            asignatura,
            'svd':
            SVDoptimized.predict(usuario.id, asignatura.codigo).est
        })
    # A function that returns the 'year' value:
    def ordenador(e):
        return e['svd']

    asignaturas_rec.sort(reverse=True, key=ordenador)

    return asignaturas_rec
示例#20
0
class SVDCollaborativeFiltering:

    # Based on Singular Value Decomposition (SVD) implementation built into surprise library
    # Uses a matrix factorization method to reduce a matrix into lower dimension parts simplifying the calculations

    def __init__(self, ratings):
        # Surprise library does not allow using data frames as training and test set values
        reader = Reader(rating_scale=(1, 5))
        data = Dataset.load_from_df(ratings[['user_id', 'book_id', 'rating']],
                                    reader)

        self.train, self.test = train_test_split(data, test_size=.20)
        self.model = SVD()

    def test_model(self):
        # Checks the predicted values against the test set
        # Returns Root Mean Square Error (RMSE) accuracy
        predictions = self.model.test(self.test)
        return accuracy.mae(predictions,
                            verbose=False), accuracy.rmse(predictions,
                                                          verbose=False)

    def train_model(self):
        # Trains the model on the training set (80% of the total ratings data)
        self.model.fit(self.train)

    def predict(self, user_id, books, ratings, already_read=None):
        # Predicts recommended books for a given user

        # Gets all unread books
        if already_read is None:
            already_read = ratings[ratings['user_id'] ==
                                   user_id]['book_id'].unique()

        prediction = books[[
            'book_id', 'title', 'authors', 'average_rating', 'image_url'
        ]].copy()
        prediction = prediction[~prediction['book_id'].isin(already_read)]

        # Predicts a rating for each book and sorts them
        prediction['predict'] = prediction['book_id'].apply(
            lambda x: self.model.predict(user_id, x).est)
        prediction = prediction.sort_values('predict', ascending=False)
        return convert(prediction)

    def save(self, location):
        # Fully saves the model
        pickle.dump(self, open(location, 'wb'))

    @staticmethod
    def load(location):
        # Loads the model
        infile = open(location, 'rb')
        obj = pickle.load(infile)
        infile.close()
        return obj
示例#21
0
def top_recommended_movies_for_user(userId:int, df:DataFrame, svd:SVD, links:DataFrame):
    movies = df.copy()
    for i in range(len(movies['id'])):
        movies['id'].iloc[i] = links[(links['tmdbId'] == movies['id'].iloc[i])]['movieId']
    
    movies['est'] = movies['id'].apply(lambda x: 0)
    for i in range(len(movies['id'])):
        movies['est'].iloc[i] = svd.predict(userId, movies['id'].iloc[i]).est
    movies = movies.sort_values('est', ascending=False)
    return movies
示例#22
0
class SVD_:
    def __init__(self,
                 data,
                 rating_scale,
                 n_epochs=50,
                 lr_all=.005,
                 reg_all=.02):
        self.data = data
        self.rating_scale = rating_scale
        self.reader = Reader(rating_scale=self.rating_scale)
        self.model_data = Dataset.load_from_df(
            data.loc[:, ["userId", "movieId", "rating"]], self.reader)
        self.trainset = self.model_data.build_full_trainset()
        self.model = SVD(n_epochs=n_epochs, lr_all=lr_all, reg_all=reg_all)
        print('fitting SVD model...')
        self.model.fit(self.trainset)
        self.grid_search_ = None

    def set_model_params(self, model_params):
        print('updating model parameters...')
        self.model = SVD(model_params)
        print('fitting SVD model...')
        self.model.fit(self.trainset)
        return self.model

    def update_grid_search(self, gs):
        self.grid_search_ = gs

    def fit(self, data):
        self.data = data
        self.model_data = Dataset.load_from_df(
            data.loc[:, ["userId", "movieId", "rating"]], self.reader)
        self.trainset = self.model_data.build_full_trainset()
        self.model.fit(self.trainset)

    def grid_search(self, grid_params):
        print('grid search...')
        gs = GridSearchCV(SVD, grid_params, measures=["rmse", "mae"], cv=3)
        gs.fit(self.model_data)
        best_params, best_score = gs.best_params["rmse"], gs.best_score["rmse"]
        print(f'Best score (RMSE): {best_score}')
        print(f'Best params (RMSE): {best_params}')

        print(f'Best score (MAE): {gs.best_score["mae"]}')
        print(f'Best params (RMSE): {gs.best_params["mae"]}')

        self.set_model_params(best_params)

        return best_params

    def predict(self, test_data):
        ratings = test_data.apply(
            lambda x: self.model.predict(x['userId'], x['movieId']).est,
            axis=1)
        return ratings
示例#23
0
def getSVDReco(username):
    my_recs = []
    items = getItemsReco(username)
    for iid in items:
        my_recs.append((iid, SVD.predict(uid=username, iid=iid).est))
    Result = pd.DataFrame(my_recs,
                          columns=['product_id', 'predictions'
                                   ]).sort_values('predictions',
                                                  ascending=False).head(10)
    #Final_Result = TranslateReco(Result, state, city, cat)
    return Result
示例#24
0
def hybrid_rec(userid, favemovie, n):
    '''this takes in a userid, favemovie and n number of recs and outputs those in a sorted list'''
    rec_hybrid = content_recommendations(favemovie, n)
    svd = SVD(n_factors=50, reg_all=0.05, random_state=150)
    trainset = data.build_full_trainset()
    svd.fit(trainset)
    for index, row in rec_hybrid.iterrows():
        pred = svd.predict(userid, index)
        rec_hybrid.at[index, 'score'] = pred.est
    rec_hybrid = rec_hybrid.sort_values('score', ascending=False)
    return rec_hybrid
示例#25
0
def SDV_algo(id):
    movies_to_predict = personalise_movie_list_for_user(id)
    algo1 = SVD()
    algo1.fit(data.build_full_trainset())
    my_recs = []
    for iid in movies_to_predict:
        my_recs.append((iid, algo1.predict(uid=1001, iid=iid).est))

    print(
        pd.DataFrame(my_recs,
                     columns=['iid', 'predictions'
                              ]).sort_values('predictions',
                                             ascending=False).head(10))
示例#26
0
 def factorisation(self, n_user, n_item):
     #retourne la matrice note complète avec n_user et n_item
     reader = Reader()
     data = Dataset.load_from_df(self.data, reader)
     SVD = surprise.SVD(n_factors=10, n_epochs=10, lr_all=.01, reg_all=.01)
     results = surprise.model_selection.validation.cross_validate(
         SVD, data, measures=['MSE'], cv=3, verbose=True)
     #maintenant on rempli la matrice
     print("temps d'attente estimé : ", round(n_user * n_item / 105000),
           "secondes.")
     M = []
     for u in range(n_user):
         M.append([SVD.predict(u, i).est for i in range(n_item)])
     return np.array(M)
示例#27
0
def recomendar_colaborativo (usuario):
    array = []
    for rate in Calificacion.objects.all():
        array.append([rate.usuario_id, rate.producto_id, rate.calificacion])
    
    df = pd.DataFrame(data=array)
    reader = Reader(rating_scale=(0, 10))
    data = Dataset.load_from_df(df, reader)
    trainingSet = data.build_full_trainset()
    param_grid = {
        'n_factors':[50,100,150],
        "n_epochs": [40,50,60],
        "lr_all": [0.002, 0.005],
        "reg_all": [0.4, 0.6]
    }

    gs = GridSearchCV(SVD, param_grid, measures=["rmse", "mae"], cv=3)
    gs.fit(data)
    #Parametros optimos
    params = gs.best_params["rmse"]
    SVDoptimized = SVD(n_factors=params['n_factors'], n_epochs=params['n_epochs'],lr_all=params['lr_all'], reg_all=params['reg_all'])
    SVDoptimized.fit(trainingSet)

    prod = Producto.objects.all()

    prod_user = Calificacion.objects.all().filter(usuario_id = usuario.id)

    #Productos sin calificar
    productos_SinC = []
    for producto in prod:
        encontrado = False
        for producto_usuario in prod_user:
            if (producto_usuario.producto_id == producto.idProducto):
                encontrado = True
        if (not encontrado):
            productos_SinC.append(producto)

    #productos_recomendados
    productos_rec = []

    for producto in productos_SinC:
        productos_rec.append({'producto': producto, 'svd': SVDoptimized.predict(usuario.id, producto.idProducto).est})

    def ordenador(e):
        return e['svd']
    
    productos_rec.sort(reverse=True, key=ordenador)

    return productos_rec
    def get_pre_rating(self, movie_id_not_rated, user_id):

        df = self.base_data
        df = Dataset.load_from_df(df[['userId', 'movieId', 'rating']], self.reader)
        trainset = df.build_full_trainset()

        # Build an algorithm, and train it.
        algo = SVD(n_factors=160, n_epochs=100, lr_all=0.005, reg_all=0.1)
        algo.fit(trainset)

        rating_pred = {}
        for movie_id in movie_id_not_rated:
            pred = algo.predict(user_id, movie_id)
            rating_pred[movie_id] = pred.est
        return rating_pred
示例#29
0
def rating_predict():
    df = pd.read_csv('data/fit1.csv')
    train = df[0: df.shape[0] // 10 * 9]
    test = df[df.shape[0] // 10 * 9:]

    # SVD
    col_names = ['user_id', 'item_id', 'rating']
    reader = Reader(rating_scale=(2, 10))
    data = Dataset.load_from_df(train[col_names], reader)
    data = data.build_full_train_pairsset()
    algo = SVD()
    algo.fit(data)
    svds = []
    for user_id, book_id in zip(train['user_id'], train['item_id']):
        svds.append(algo.predict(user_id, book_id).est)
    train['SVD'] = svds
def new_recommendations(df, new_ratings):
    df = df[['user_id', 'isbn', 'rating']]
    new_ratings = pd.DataFrame(new_ratings)[['user_id', 'isbn', 'rating']]
    new_df = pd.concat([df, new_ratings]).reset_index(drop=True)
    reader = Reader(rating_scale=(1,5))
    data = Dataset.load_from_df(new_df,reader)
    train, test = train_test_split(data, test_size=.2)
    model = SVD(n_epochs=17, lr_all=.015, reg_all=.125, n_factors=17)
    model.fit(train)
    preds = model.test(test)
    user_id = new_ratings.user_id[0]
    book_list = []
    for x in new_df.isbn.unique():
        book_list.append((x, model.predict(user_id,x)[3]))
    ranked_books = sorted(book_list, key=lambda x: x[1], reverse=True)
    return ranked_books