Пример #1
0
def test_tr_te_split():
    u = [0, 1, 0, 2, 1, 3]
    i = [1, 2, 2, 0, 3, 2]
    r = [0.5, 1.0, 0.0, 1.0, 0.0, 1.]
    X = csr_matrix((r, (u, i)), shape=(4, 4))
    train, test = train_test_split(X, train_size=0.7, random_state=42)

    # one will be masked in the train array
    assert_array_almost_equal(
        train.toarray(),
        np.array([
            [0, 0.5, 0, 0],
            [0, 0, 0, 0],  # masked
            [1, 0, 0, 0],
            [0, 0, 1, 0]
        ]))

    assert_array_almost_equal(
        test.toarray(),
        np.array([
            [0, 0, 0, 0],
            [0, 0, 1, 0],  # held out
            [0, 0, 0, 0],
            [0, 0, 0, 0]
        ]))
Пример #2
0
from reclab.model_selection import train_test_split
from reclab.model_deployment import RecommenderDeployment
from reclab.collab import AlternatingLeastSquares

from scipy import sparse
from sklearn.preprocessing import LabelEncoder
from sklearn.externals import joblib
from numpy.testing import assert_array_equal

import pytest
import warnings
import os

# Load data and split into train/test
lastfm = load_lastfm(cache=True, as_sparse=True)
train, test = train_test_split(lastfm.ratings, random_state=42)


class TestRecommenderDeployment(object):
    def test_simple_deployment(self):
        als = AlternatingLeastSquares(factors=10, use_cg=False, iterations=3)
        als.fit(train)
        recs1 = als.recommend_for_user(0, test)

        deployment = RecommenderDeployment(estimator=als)
        recs2 = deployment.recommend_for_user(0, test[0, :].toarray()[0])
        assert_array_equal(recs1, recs2)

    def test_encoded_deployment(self):
        users = ['adam', 'betty', 'betty', 'frank', 'frank']
        items = ["chili's", "chuy's", "chili's", "torchy's", "chuy's"]
items = lastfm.products
ratings = lastfm.ratings
artists = lastfm.artists

# We need to encode the users/items. If you use as_sparse=True, they come
# pre-encoded, but we will do it here manually for example.
user_le = LabelEncoder()
item_le = LabelEncoder()
users_transformed = user_le.fit_transform(users)
items_transformed = item_le.fit_transform(items)

# Split the data
X = to_sparse_csr(u=users_transformed,
                  i=items_transformed,
                  r=ratings, axis=0, dtype=np.float32)
train, test = train_test_split(X, train_size=0.75, random_state=42)

# #############################################################################
# Fit our model, make our deployment object
als = AlternatingLeastSquares(
    random_state=42, use_gpu=False, use_cg=True,
    iterations=50, factors=100)
als.fit(train)

# This is what you'd persist:
wrapper = RecommenderDeployment(
    estimator=als, user_missing_strategy="error",

    # These are optional, and can be None if you don't want transformed recs
    item_encoder=item_le, user_encoder=user_le)