Python KNNWithMeans.sim примеры использования

Язык программирования: Python

Пространство имен/Пакет: surprise

Класс/Тип: KNNWithMeans

Метод/Функция: sim

Примеров на hotexamples.com: 2

Python KNNWithMeans.sim - 2 примера найдено. Это лучшие примеры Python кода для surprise.KNNWithMeans.sim, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

KNNWithMeans(30)

fit(30)

predict(30)

test(30)

train(7)

get_neighbors(4)

__init__(2)

sim(2)

compute_similarities(1)

prepare_model(1)

Пример #1

Показать файл

Файл: lukas_recommender_playground.py Проект: tillhoffmann1411/board-game-recommender

def train_surprise_model():
    # import reduced dataset:
    df = import_reduced_reviews(
        'C:/Users/lukas/OneDrive/Desktop/Reviews_Reduced.csv')
    df = df[['user_key', 'game_key', 'rating']]

    # drop duplicates:
    df = df.drop_duplicates(subset=['game_key', 'user_key'])

    ### Modelling part with Surprise:
    # get data in a format surprise can work with:
    reader = Reader(rating_scale=(1, 10))
    data = Dataset.load_from_df(df[['user_key', 'game_key', 'rating']], reader)

    # Build trainset from the whole dataset:
    trainsetfull = data.build_full_trainset()
    print('Number of users: ', trainsetfull.n_users, '\n')
    print('Number of items: ', trainsetfull.n_items, '\n')

    # Parameters:
    sim_option = {'name': 'cosine', 'user_based': False}
    k = 10
    min_k = 5

    algo = KNNWithMeans(k=k, min_k=min_k, sim_options=sim_option)

    # Run fit:
    start_time = time.time()
    algo.fit(trainsetfull)
    print("--- %s seconds ---" % (time.time() - start_time))

    ### Test: is it possible to exchange the sim matrix?
    sim_matrix_imported = pd.read_csv(
        '../Data/Recommender/selfmade_item-item-similarity-matrix.csv',
        index_col=0)
    sim_matrix_imported.columns = sim_matrix_imported.columns.astype(int)
    sim_matrix_imported = sim_matrix_imported.to_numpy()

    a = algo.predict(93681, 100007)
    algo.sim = sim_matrix_imported
    b = algo.predict(93681, 100007)

    # We now need to save the similarity matrix somewhere:
    sim_matrix = algo.sim
    pd.DataFrame(sim_matrix).to_csv(
        '../Data/Recommender/sim_matrix-myKNNWithMeans_item_based_model')

    # Save the precomputed model:
    dump.dump('../Data/Recommender/myKNNWithMeans_item_based_model', algo)

Пример #2

Показать файл

Файл: lukas_recommender_playground.py Проект: tillhoffmann1411/board-game-recommender

def collaborative_filtering_using_surprise():
    """
    https://towardsdatascience.com/how-to-build-a-memory-based-recommendation-system-using-python-surprise-55f3257b2cf4
    Predict games for user with user_key = 93681
    """
    target_user_key = 93681

    # import reduced dataset:
    df = import_reduced_reviews()

    # check for duplicates:
    duplicates = len(df) - len(
        df.drop_duplicates(subset=['game_key', 'user_key']))

    # drop duplicates:
    df = df.drop_duplicates(subset=['game_key', 'user_key'])
    print('duplicates removed: ' + str(duplicates))

    # check out our user:
    df_target_user = df[df['user_key'] == target_user_key]

    # build utility matrix:
    # data_pivot = df.pivot(index='user_key', columns='game_key', values='rating')

    # calculate sparsity
    # sparsity = data_pivot.isnull().sum().sum() / data_pivot.size
    # print('Sparcity of utility matrix: ' + str(sparsity))

    ### Modelling part with Surprise:
    # get data in a format surprise can work with:
    reader = Reader(rating_scale=(1, 10))
    data = Dataset.load_from_df(df[['user_key', 'game_key', 'rating']], reader)

    # Split in trainset and testset
    trainset, testset = train_test_split(data, test_size=0.2)

    print('Number of users: ', trainset.n_users, '\n')
    print('Number of items: ', trainset.n_items, '\n')

    # When surprise creates a Trainset or Testset object, it takes the raw_id’s (the ones that you used in the file
    # you imported), and converts them to so-called inner_id’s (basically a series of integers, starting from 0). You
    # might need to trace back to the original names. Using the items as an example (you can do the same approach
    # with users, just swap iid's with uid's in the code), to get the list of inner_iids, you can use the all_items
    # method. To convert from raw to inner id you can use the to_inner_iid method, and the to_raw_iid to convert back.

    # An example on how to save a list of inner and raw item id’s:
    trainset_iids = list(trainset.all_items())
    iid_converter = lambda x: trainset.to_raw_iid(x)
    trainset_raw_iids = list(map(iid_converter, trainset_iids))

    ## Model parameters: of kNN:
    # Two hyperparameters we can tune:
    # 1. k parameter
    # 2. similarity option
    #   a) user-user vs item-item
    #   b) similarity function (cosine, pearson, msd)

    sim_option = {'name': 'pearson', 'user_based': False}

    # 3 different KNN Models: KNNBasic, KNNWithMeans, KNNWithZScore
    k = 40
    min_k = 5

    algo = KNNWithMeans(k=k, min_k=min_k, sim_options=sim_option)

    algo.fit(trainset)

    ## Testing:
    predictions = algo.test(testset)

    accuracy.rmse(predictions)

    # Own similarity matrix:
    sim_matrix_imported = pd.read_csv(
        '../Data/Recommender/selfmade_item-item-similarity-matrix.csv',
        index_col=0)
    sim_matrix_imported.columns = sim_matrix_imported.columns.astype(int)
    sim_matrix_imported = sim_matrix_imported.to_numpy()

    algo.sim = sim_matrix_imported

    predictions = algo.test(testset)

    accuracy.rmse(predictions)

    # Cross validation:
    skip = True
    if not skip:
        results = cross_validate(algo=algo,
                                 data=data,
                                 measures=['RMSE'],
                                 cv=5,
                                 return_train_measures=True)
        results_mean = results['test_rmse'].mean()

    ## Predictions
    # Lets assume we are happy with the method and now want to apply it to the entire data set.

    # Estimate for a specific user a specific item:
    single_item_single_user_prediction = algo.predict(uid=target_user_key,
                                                      iid=100010,
                                                      verbose=True)

    # Estimate all items for a specific user:
    list_of_all_items = trainset_raw_iids
    target_predictions = []

    for item in list_of_all_items:
        single_prediction = algo.predict(uid=target_user_key, iid=item)
        target_predictions.append(
            (single_prediction.uid, single_prediction.iid,
             single_prediction.est))

    # Then sort the predictions for each user and retrieve the k highest ones:
    target_predictions.sort(key=lambda x: x[2], reverse=True)
    n = 20
    top_n = target_predictions[:n]
    top_n = [row[1] for row in top_n]

    print('end')