Пример #1
0
    def test_cg_nan2(self):
        # test out Nan appearing in CG code (from https://github.com/benfred/implicit/issues/106)
        Ciu = random(m=100,
                     n=100,
                     density=0.0005,
                     format='coo',
                     dtype=np.float32,
                     random_state=42,
                     data_rvs=None).T.tocsr()

        configs = [{
            'use_native': True,
            'use_gpu': False
        }, {
            'use_native': False,
            'use_gpu': False
        }]
        if HAS_CUDA:
            configs.append({'use_gpu': True})

        for options in configs:
            model = AlternatingLeastSquares(factors=32,
                                            regularization=10,
                                            iterations=10,
                                            dtype=np.float32,
                                            **options)
            model.show_progress = False
            model.fit(Ciu)

            self.assertTrue(np.isfinite(model.item_factors).all())
            self.assertTrue(np.isfinite(model.user_factors).all())
Пример #2
0
    def test_cg_nan(self):
        # test issue with CG code that was causing NaN values in output:
        # https://github.com/benfred/implicit/issues/19#issuecomment-283164905
        raw = [[0.0, 2.0, 1.5, 1.33333333, 1.25, 1.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
               [0.0, 0.0, 2.0, 1.5, 1.33333333, 1.25, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
               [0.0, 0.0, 0.0, 2.0, 1.5, 1.33333333, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
               [0.0, 0.0, 0.0, 0.0, 2.0, 1.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
               [0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
               [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
               [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.5, 1.33333333, 1.25, 1.2],
               [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.5, 1.33333333, 1.25],
               [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.5, 1.33333333],
               [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.5],
               [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0],
               [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]]
        counts = csr_matrix(raw, dtype=np.float64)
        for use_native in (True, False):
            model = AlternatingLeastSquares(factors=3,
                                            regularization=0.01,
                                            dtype=np.float64,
                                            use_native=use_native,
                                            use_cg=True)
            model.show_progress = False
            model.fit(counts)
            rows, cols = model.item_factors, model.user_factors

            self.assertFalse(np.isnan(np.sum(cols)))
            self.assertFalse(np.isnan(np.sum(rows)))
Пример #3
0
    def test_explain(self):
        counts = csr_matrix(
            [[1, 1, 0, 1, 0, 0], [0, 1, 1, 1, 0, 0], [1, 4, 1, 0, 7, 0],
             [1, 1, 0, 0, 0, 0], [9, 0, 4, 1, 0, 1], [0, 1, 0, 0, 0, 1],
             [0, 0, 2, 0, 1, 1]],
            dtype=np.float64)
        user_items = counts * 2
        item_users = user_items.T

        model = AlternatingLeastSquares(factors=4,
                                        regularization=20,
                                        use_native=False,
                                        use_cg=False,
                                        iterations=100)
        np.random.seed(23)
        model.show_progress = False
        model.fit(user_items)

        userid = 0

        # Assert recommendation is the the same if we recompute user vectors
        recs = model.recommend(userid, item_users, N=10)
        recalculated_recs = model.recommend(userid,
                                            item_users,
                                            N=10,
                                            recalculate_user=True)
        for (item1, score1), (item2, score2) in zip(recs, recalculated_recs):
            self.assertEqual(item1, item2)
            self.assertAlmostEqual(score1, score2, 4)

        # Assert explanation makes sense
        top_rec, score = recalculated_recs[0]
        score_explained, contributions, W = model.explain(userid,
                                                          item_users,
                                                          itemid=top_rec)
        scores = [s for _, s in contributions]
        items = [i for i, _ in contributions]
        self.assertAlmostEqual(score, score_explained, 4)
        self.assertAlmostEqual(score, sum(scores), 4)
        self.assertEqual(scores, sorted(scores, reverse=True),
                         "Scores not in order")
        self.assertEqual([0, 2, 3, 4], sorted(items), "Items not seen by user")

        # Assert explanation with precomputed user weights is correct
        top_score_explained, top_contributions, W = model.explain(
            userid, item_users, itemid=top_rec, user_weights=W, N=2)
        top_scores = [s for _, s in top_contributions]
        top_items = [i for i, _ in top_contributions]
        self.assertEqual(2, len(top_contributions))
        self.assertAlmostEqual(score, top_score_explained, 4)
        self.assertEqual(scores[:2], top_scores)
        self.assertEqual(items[:2], top_items)
Пример #4
0
    def test_factorize(self):
        counts = csr_matrix([[1, 1, 0, 1, 0, 0],
                             [0, 1, 1, 1, 0, 0],
                             [1, 0, 1, 0, 0, 0],
                             [1, 1, 0, 0, 0, 0],
                             [0, 0, 1, 1, 0, 1],
                             [0, 1, 0, 0, 0, 1],
                             [0, 0, 0, 0, 1, 1]], dtype=np.float64)
        user_items = counts * 2

        # try all 8 variants of native/python, cg/cholesky, and
        # 64 vs 32 bit factors
        options = [(dtype, cg, native, False)
                   for dtype in (np.float32, np.float64)
                   for cg in (False, True)
                   for native in (False, True)]

        # also try out GPU support if available
        if HAS_CUDA:
            options.append((np.float32, False, False, True))

        for dtype, use_cg, use_native, use_gpu in options:
            try:
                model = AlternatingLeastSquares(factors=6,
                                                regularization=0,
                                                dtype=dtype,
                                                use_native=use_native,
                                                use_cg=use_cg,
                                                use_gpu=use_gpu)
                np.random.seed(23)
                model.show_progress = False
                model.fit(user_items)
                rows, cols = model.item_factors, model.user_factors

            except Exception as e:
                self.fail(msg="failed to factorize matrix. Error=%s"
                              " dtype=%s, cg=%s, native=%s gpu=%s"
                              % (e, dtype, use_cg, use_native, use_gpu))

            reconstructed = rows.dot(cols.T)
            for i in range(counts.shape[0]):
                for j in range(counts.shape[1]):
                    self.assertAlmostEqual(counts[i, j], reconstructed[i, j],
                                           delta=0.0001,
                                           msg="failed to reconstruct row=%s, col=%s,"
                                               " value=%.5f, dtype=%s, cg=%s, native=%s gpu=%s"
                                               % (i, j, reconstructed[i, j], dtype, use_cg,
                                                  use_native, use_gpu))