Пример #1
0
    def test_simple_fit(self):
        logger = logging.getLogger("ALS_test_simple_fit")
        logger.debug("\nPre-instantiate clf1")

        clf1 = AlternatingLeastSquares(
            random_state=1, use_gpu=False, use_cg=True,
            iterations=5)

        logger.debug("Pre-instantiate clf2")
        clf2 = AlternatingLeastSquares(
            random_state=1, use_gpu=False, use_cg=True,
            iterations=5)

        # Show that the _make_estimator will initialize the matrices in a
        # replicable fashion given the random seed
        # PRE-FIT:
        logger.debug("Making estimator with clf1")
        est1 = clf1._make_estimator(train)
        logger.debug("Making estimator with clf2")
        est2 = clf2._make_estimator(train)
        for attr in ('item_factors', 'user_factors'):
            assert_array_almost_equal(getattr(est1, attr),
                                      getattr(est2, attr))

        # Are they the same POST-fit? They SHOULD be... (note this is only
        # the case if use_cg is FALSE!!)
        logger.debug("Fitting first estimator")
        clf1.fit(train)

        # Show the n_items is right
        assert clf1.n_items() == train.shape[1]
        assert clf1.n_users() == train.shape[0]
Пример #2
0
    def test_simple_deployment(self):
        als = AlternatingLeastSquares(factors=10, use_cg=False, iterations=3)
        als.fit(train)
        recs1 = als.recommend_for_user(0, test)

        deployment = RecommenderDeployment(estimator=als)
        recs2 = deployment.recommend_for_user(0, test[0, :].toarray()[0])
        assert_array_equal(recs1, recs2)
Пример #3
0
    def test_recommend_single(self):
        clf = AlternatingLeastSquares(
            random_state=1, use_gpu=False, use_cg=True,
            iterations=5)
        clf.fit(train)

        # Make assertions on the recommendations
        self._single_recommend_assertions(clf, train, test)

        # Special assert for ALS only where n + count > len n_items.
        # Should just end up being n_items.
        n_items = test.shape[1]
        recs = clf.recommend_for_user(0, test, n=n_items + 5,
                                      filter_previously_rated=False)
        assert len(recs) == n_items, len(recs)
Пример #4
0
    def test_random_cv_fit_recommend(self):
        """Test a simple fit"""
        # Create the estimator
        clf = AlternatingLeastSquares(random_state=42, use_cg=True,
                                      iterations=5, factors=15)

        # These are the hyper parameters we'll use
        hyper = {
            'factors': randint(5, 6),
            'regularization': uniform(0.01, 0.05)
        }

        # Make our cv
        cv = KFold(n_splits=2, random_state=1, shuffle=True)
        search = RandomizedRecommenderSearchCV(
            estimator=clf, cv=cv, random_state=42,
            param_distributions=hyper, n_jobs=1,
            n_iter=2, recommend_params={"filter_previously_rated": True},
            verbose=1, scoring='ndcg')

        # While we're fitting, assert we get a warning about the
        # "filter_previously_rated" key in the fit params...
        with warnings.catch_warnings(record=True) as w:
            self._search_fit_assert(search)  # should warn in fit

            # Verify...
            assert len(w)
            assert any(["filter_previously_rated" in str(warn.message)
                        for warn in w])
Пример #5
0
    def test_recommend_all(self):
        # Recommend for ALL users
        clf = AlternatingLeastSquares(
            random_state=1, use_gpu=False, use_cg=True,
            iterations=5).fit(train)

        # Mask assertions
        self._all_recommend_assertions(clf, test)
Пример #6
0
    def test_random_val_fit(self):
        """Test a simple fit"""
        # Create the estimator
        clf = AlternatingLeastSquares(random_state=42, use_cg=True,
                                      iterations=5, factors=10)

        # These are the hyper parameters we'll use
        hyper = {
            'factors': randint(5, 6),
            'regularization': uniform(0.01, 0.05)
        }

        # Create search with no CV and use validation set instead
        search = RandomizedRecommenderSearchCV(
            estimator=clf, cv=None, random_state=42,
            param_distributions=hyper, n_jobs=1,
            n_iter=2, verbose=1)

        self._search_fit_assert(search, val=test)
Пример #7
0
    def test_encoded_deployment(self):
        users = ['adam', 'betty', 'betty', 'frank', 'frank']
        items = ["chili's", "chuy's", "chili's", "torchy's", "chuy's"]
        visits = [2, 4, 1, 8, 5]

        # Encode the labels
        user_le = LabelEncoder()
        item_le = LabelEncoder()
        users = user_le.fit_transform(users)
        items = item_le.fit_transform(items)

        # Make the matrix (don't bother splitting for this example)
        R = sparse.csr_matrix((visits, (users, items)), shape=(3, 3))
        als = AlternatingLeastSquares(factors=2, use_cg=False, iterations=5)
        als.fit(R)
        recs1 = als.recommend_for_user(0, R)

        # Test failing constructors first
        with pytest.raises(TypeError):
            RecommenderDeployment(estimator=als,
                                  item_encoder='bad_encoder',
                                  user_encoder=user_le,
                                  user_missing_strategy='error')
        with pytest.raises(TypeError):
            RecommenderDeployment(estimator=als,
                                  item_encoder=item_le,
                                  user_encoder='bad_encoder',
                                  user_missing_strategy='error')
        with pytest.raises(TypeError):
            RecommenderDeployment(estimator=als,
                                  item_encoder=item_le,
                                  user_encoder=user_le,
                                  filter_items='non-iterable',
                                  user_missing_strategy='error')
        with pytest.raises(ValueError):
            RecommenderDeployment(estimator=als,
                                  item_encoder=item_le,
                                  user_encoder=user_le,
                                  user_missing_strategy='bad-strategy')

        # "deploy" with both encoders
        deployment = RecommenderDeployment(estimator=als,
                                           item_encoder=item_le,
                                           user_encoder=user_le,
                                           user_missing_strategy='error')
        recs2 = deployment.recommend_for_user('adam', R[0, :].toarray()[0])

        # Show that the encoded recs are the same as before
        assert_array_equal(recs1, item_le.transform(recs2))

        # What if we pass a dict?
        recs3 = deployment.recommend_for_user('adam', {"chili's": 2})
        assert_array_equal(recs1, item_le.transform(recs3))

        # And if we want scores?
        recs4, scores = deployment.recommend_for_user('adam',
                                                      R[0, :].toarray()[0],
                                                      return_scores=True)
        assert_array_equal(recs1, item_le.transform(recs4))
        assert scores.shape[0] == recs4.shape[0]

        # Test the persistence model
        pkl_location = "model.pkl"
        try:
            joblib.dump(deployment, pkl_location, compress=3)
            loaded = joblib.load(pkl_location)
            recs5 = loaded.recommend_for_user('adam', R[0, :].toarray()[0])
            assert_array_equal(recs1, item_le.transform(recs5))
        finally:
            os.unlink(pkl_location)

        # If we set the user_encoder to None, show we get the same
        # recommendations with a non-encoded user ID
        deployment.user_encoder = None
        recs_no_encode = deployment.recommend_for_user(0, R[0, :].toarray()[0])
        assert_array_equal(recs1, item_le.transform(recs_no_encode))

        # Oh, and now we fail with a TypeError if we pass a string since it
        # never gets transformed
        with pytest.raises(TypeError):
            deployment.recommend_for_user('adam', R[0, :].toarray()[0])

        # What if we give it a user that doesn't exist? Or a negative one?
        with pytest.raises(KeyError):
            deployment.recommend_for_user(9, R[0, :].toarray()[0])
        with pytest.raises(KeyError):
            deployment.recommend_for_user(-1, R[0, :].toarray()[0])

        # Show we fail with improper dims
        with pytest.raises(ValueError):
            deployment.recommend_for_user(0, [2.])

        # Now set the item encoder to none
        deployment.item_encoder = None
        recs_no_encode_anything = deployment.recommend_for_user(0, {0: 2})
        assert_array_equal(recs1, recs_no_encode_anything)

        # Set it to "warn" and try again
        deployment.user_missing_strategy = "warn"
        with warnings.catch_warnings(record=True) as w:
            warnings.simplefilter("always")

            # execute the fxn
            recs = deployment.recommend_for_user(9, R[0, :].toarray()[0])
            assert len(w)  # assert there's something there...
            assert recs.shape[0] == 0

            # do the same with return_scores
            recs, scores = deployment.recommend_for_user(9,
                                                         R[0, :].toarray()[0],
                                                         return_scores=True)
            assert recs.shape[0] == scores.shape[0] == 0
# pre-encoded, but we will do it here manually for example.
user_le = LabelEncoder()
item_le = LabelEncoder()
users_transformed = user_le.fit_transform(users)
items_transformed = item_le.fit_transform(items)

# Split the data
X = to_sparse_csr(u=users_transformed,
                  i=items_transformed,
                  r=ratings, axis=0, dtype=np.float32)
train, test = train_test_split(X, train_size=0.75, random_state=42)

# #############################################################################
# Fit our model, make our deployment object
als = AlternatingLeastSquares(
    random_state=42, use_gpu=False, use_cg=True,
    iterations=50, factors=100)
als.fit(train)

# This is what you'd persist:
wrapper = RecommenderDeployment(
    estimator=als, user_missing_strategy="error",

    # These are optional, and can be None if you don't want transformed recs
    item_encoder=item_le, user_encoder=user_le)

# #############################################################################
# Generate predictions for a fan of classic rock

def top_listener(of):
    musician_id = [i for i, v in artists.items() if v == of][0]
Пример #9
0
    def test_serialize(self):
        clf = AlternatingLeastSquares(
            random_state=1, use_gpu=False, use_cg=True,
            iterations=5)

        self._serialization_assertions(clf, train, test)
Пример #10
0
 def test_complex_fit(self):
     # Show we can fit a really complex model
     AlternatingLeastSquares(random_state=42, use_cg=True, iterations=15,
                             factors=150, regularization=0.01,
                             num_threads=1)
Пример #11
0
from reclab.collab import AlternatingLeastSquares as ALS
import numpy as np

# #############################################################################
# Load data and split into train/test
lastfm = load_lastfm(cache=True, as_sparse=True)
train, test = train_test_split(lastfm.ratings, random_state=42)

print("Train:")
print(repr(train))
print("\nTest:")
print(repr(test))

# #############################################################################
# Fit our model
als = ALS(random_state=1, use_gpu=False, use_cg=True,
          iterations=25, factors=100)
als.fit(train)

# #############################################################################
# Generate predictions (on the test set) for a user who is a metal head like me
artists = lastfm.artists
mayhem_id = np.where(artists == "Mayhem")[0][0]
mayhem_listens = train[:, mayhem_id].toarray().ravel()
mayhem_listeners = np.argsort(-mayhem_listens)
mayhem_appreciator = mayhem_listeners[0]  # Has the best taste in music :)
print("\nUser #%i listened to Mayhem %i times.\nThis user's top 5 "
      "most-listened-to artists are:\n%s"
      % (mayhem_appreciator, int(train[mayhem_appreciator, mayhem_id]),
         str(artists[np.argsort(
             -train[mayhem_appreciator, :].toarray())][0, :5])))