Exemplo n.º 1
0
class TFIDFModelTest(unittest.TestCase):

    def setUp(self):
        self.data = DummyDataset()
        self.model = TFIDFModel(verbose = False)

    def test_tocsr(self):
        # test item/tag matrix conversion
        TF_expected = [[1, 1, 1, 1, 1, 1],
                       [1, 0, 4, 0, 1, 0],
                       [1, 0, 2, 0, 2, 1],
                       [1, 0, 0, 3, 2, 0],
                       [1, 0, 0, 4, 0, 1]]
        TF = self.model._TFIDFModel__convert_tocsr(self.data.item_tags)
        self.assertTrue(np.array_equal(TF.todense(), TF_expected))

    def test_extract(self):
        # test fact extraction without a threshold
        DF_expected = [[1, 1, 1, 1, 1, 1],
                       [1, 0, 1, 0, 1, 0],
                       [1, 0, 1, 0, 1, 1],
                       [1, 0, 0, 1, 1, 0],
                       [1, 0, 0, 1, 0, 1]]
        DF = self.model._TFIDFModel__extract_facts(self.data.item_tags)
        self.assertTrue(np.array_equal(DF.todense(), DF_expected))

    def test_extract_threshold(self):
        # test preference extraction with a threshold
        P_expected = [[1, 0, 0, 1, 0],
                      [1, 1, 1, 1, 1],
                      [1, 0, 0, 0, 1]]
        P = self.model._TFIDFModel__extract_facts(self.data.ratings, 3.5)
        self.assertTrue(np.array_equal(P.todense(), P_expected))

    def test_tfidf_profiles(self):
        I_expected = np.matrix([[ 0.,          0.86991409,  0.27610534,  0.27610534,  0.12061088,  0.27610534],
                                [ 0.,          0.        ,  0.9940897 ,  0.        ,  0.10856185,  0.        ],
                                [ 0.,          0.        ,  0.83309624,  0.        ,  0.36392077,  0.41654812],
                                [ 0.,          0.        ,  0.        ,  0.96011533,  0.27960428,  0.        ],
                                [ 0.,          0.        ,  0.        ,  0.9701425 ,  0.        ,  0.24253563]])
        self.model.build(self.data)
        # test TFIDF item profile extraction
        self.assertTrue(stringify_matrix(self.model.I().todense()) == 
                        stringify_matrix(I_expected))