Exemplo n.º 1
0
    def receive_feedbacks(self, session, feedbacks):
        """
        Receive feedbacks from user
        
        The format of feedback is:
        {
        "docs": [[doc_id, feedback_value], ...],
        "kws": [[keyword_id, feedback_value], ...],
        "dockws": [[keyword_id, doc_id, feedback_value], ...]
        }
        """
        print "propagation started..."
        for doc_fb in feedbacks.get("docs", []):
            doc_id, fb = doc_fb
            doc = Document.get(doc_id)
            
            self.ppgt.fb_from_doc(doc, fb, session)

        for kw_fb in feedbacks.get("kws", []):
            kw_id, fb = kw_fb
            kw = Keyword.get(kw_id)
            
            self.ppgt.fb_from_kw(kw, fb, session)

        for dockw_fb in feedbacks.get("dockws", []):
            kw_id, doc_id, fb = dockw_fb
            doc = Document.get(doc_id)
            kw = Keyword.get(kw_id)
            
            self.ppgt.fb_from_dockw(kw, doc, fb, session)

        # propagation is done
        # updates the feedback value 
        self.upd.update(session)
        print "propagation finished"
Exemplo n.º 2
0
    def test_similarity(self):
        # for doc
        doc1 = Document.get(1)
        doc2 = Document.get(2)
        doc3 = Document.get(3)

        self.assertAlmostEqual(0.6300877890447911, doc1.similarity_to(doc2))
        self.assertAlmostEqual(doc2.similarity_to(doc1),
                               doc1.similarity_to(doc2))

        self.assertAlmostEqual(0.31713642199844894, doc1.similarity_to(doc3))

        self.assertRaises(NotImplementedError, doc1.similarity_to, doc3,
                          "not implemented metric")

        # for kw
        kw1 = Keyword.get("redis")
        kw2 = Keyword.get("database")
        kw3 = Keyword.get("python")

        self.assertAlmostEqual(0.6698544675330306, kw1.similarity_to(kw2))
        self.assertAlmostEqual(kw2.similarity_to(kw1), kw1.similarity_to(kw2))

        self.assertAlmostEqual(0.2613424459663648, kw1.similarity_to(kw3))

        self.assertRaises(NotImplementedError, kw1.similarity_to, kw3,
                          "not implemented metric")
Exemplo n.º 3
0
 def recommend_keywords(self, fmim,
                        session, top_n, mu, c, 
                        sampler=None):
     """
     fmim: FeatureMatrixAndIndexMapping, the fmim for the sub-matrix        
     session: Session,
     top_n: how many kws are returned
     mu,c: float, the parameters for LinRel algorithm
     
     Return
     KeywordList: a list of keyword ids as well as their scores
     """        
     kws = Keyword.get_many(fmim.kw_ind.keys())
     fbs = dict([(kw.id, kw.fb(session)) for kw in kws])
     
     id_with_scores, id_with_explt_scores, id_with_explr_scores = self.generic_rank(fmim.kw2doc_m, fbs, 
                                                                                    fmim.kw_ind, fmim.kw_ind_r,
                                                                                    mu, c)
     
     kws = []
     for kw_id, score in id_with_scores.items()[:top_n]:
         kw = Keyword.get(kw_id)
         kw['score'] = score
         kw['recommended'] = True
         kws.append(kw)
         
     return kws
Exemplo n.º 4
0
    def receive_feedbacks(self, session, feedbacks):
        """
        Receive feedbacks from user
        
        The format of feedback is:
        {
        "docs": [[doc_id, feedback_value], ...],
        "kws": [[keyword_id, feedback_value], ...],
        "dockws": [[keyword_id, doc_id, feedback_value], ...]
        }
        """
        print "propagation started..."
        for doc_fb in feedbacks.get("docs", []):
            doc_id, fb = doc_fb
            doc = Document.get(doc_id)

            self.ppgt.fb_from_doc(doc, fb, session)

        for kw_fb in feedbacks.get("kws", []):
            kw_id, fb = kw_fb
            kw = Keyword.get(kw_id)

            self.ppgt.fb_from_kw(kw, fb, session)

        for dockw_fb in feedbacks.get("dockws", []):
            kw_id, doc_id, fb = dockw_fb
            doc = Document.get(doc_id)
            kw = Keyword.get(kw_id)

            self.ppgt.fb_from_dockw(kw, doc, fb, session)

        # propagation is done
        # updates the feedback value
        self.upd.update(session)
        print "propagation finished"
Exemplo n.º 5
0
    def setUp(self):
                #make the fmim
        kw_filters = [self.my_kw_filter]
        doc_filters = [self.kw_count_filter, self.has_database_filter]
        
        
        self.r = LinRelRecommender(2, 2, 
                                   1.0, 0.1, 1.0, 0.1,
                                   #the default configuration
                                   kw_filters = None, doc_filters = [self.kw_count_filter, self.has_database_filter],
                                   **fmim.__dict__)
        
        self.session = get_session()
        
        self.session.update_kw_feedback(Keyword.get("redis"), .7)
        self.session.update_kw_feedback(Keyword.get("database"), .6)
        
        self.session.update_doc_feedback(Document.get(1), .7)
        self.session.update_doc_feedback(Document.get(2), .7)
        self.session.update_doc_feedback(Document.get(8), .7)

        filtered_kws = self.r._filter_objs(kw_filters, kws = Keyword.all_kws)
        filtered_docs = self.r._filter_objs(doc_filters, docs = Document.all_docs)
        
        kw2doc_submat, kw_ind_map, kw_ind_map_r = self.r._submatrix_and_indexing(filtered_kws, filtered_docs, fmim.kw2doc_m, fmim.kw_ind, fmim.doc_ind)
        doc2kw_submat, doc_ind_map, doc_ind_map_r = self.r._submatrix_and_indexing(filtered_docs, filtered_kws, fmim.doc2kw_m, fmim.doc_ind, fmim.kw_ind)
        
        self.fmim = FeatureMatrixAndIndexMapping(kw_ind_map, doc_ind_map, kw2doc_submat, doc2kw_submat, kw_ind_map_r, doc_ind_map_r)
Exemplo n.º 6
0
    def test_get_many(self):
        doc_ids = [1, 2]
        kw_ids = ["a", "the"]

        self.assertEqual(Document.get_many([1, 2]), Document.get_many(doc_ids))

        self.assertEqual(Keyword.get_many(["a", "the"]),
                         Keyword.get_many(kw_ids))
Exemplo n.º 7
0
    def test_kw_fb_threshold_filter_with_prefiltering(self):
        #change the feedback
        self.session.update_kw_feedback(Keyword.get("python"), .2)
        self.session.update_kw_feedback(Keyword.get("a"), .0999999)

        actual = kw_fb_threshold_filter(0.1, self.session, with_fb=True)
        expected = Keyword.get_many(["python"])

        self.assertEqual(expected, actual)
Exemplo n.º 8
0
    def test_add_kws(self):
        iter1 = Keyword.get_many(["redis", "database", "mysql"])
        iter2 = Keyword.get_many(["redis", "database", "python"])
        
        self.session.add_kw_recom_list(iter1)        
        self.assertEqual([iter1], self.session.recom_kws)

        self.session.add_kw_recom_list(iter2)        
        self.assertEqual([iter1, iter2], self.session.recom_kws)
Exemplo n.º 9
0
    def test_one(self):
        docs = [Document.get_many([1,2]), Document.get_many([1,2]), Document.get_many([2,1])]
        kws = [Keyword.get_many(["redis", "database"]), Keyword.get_many(["redis", "database"]), Keyword.get_many(["redis", "database"])]

        scores = self.e.evaluate(docs, kws)
        expected = ([1,1,1], [1,1,1])
        
        self.assertArrayAlmostEqual(expected[0], scores[0])
        self.assertArrayAlmostEqual(expected[1], scores[1])
Exemplo n.º 10
0
    def test_get_many(self):
        doc_ids = [1,2]
        kw_ids = ["a", "the"]
        
        self.assertEqual(Document.get_many([1,2]), 
                         Document.get_many(doc_ids))

        self.assertEqual(Keyword.get_many(["a", "the"]), 
                         Keyword.get_many(kw_ids))
Exemplo n.º 11
0
    def test_kw_fb_threshold_filter(self):
        #change the feedback
        self.session.update_kw_feedback(Keyword.get("python"), .2)
        self.session.update_kw_feedback(Keyword.get("a"), .0999999)
        
        actual = kw_fb_threshold_filter(0.1, self.session, 
                                        kws = Keyword.all_kws, with_fb = False)
        expected = Keyword.get_many(["python"])

        self.assertEqual(expected, actual)
Exemplo n.º 12
0
    def test_kw_fb_filter(self):
        kw = Keyword.get("redis")
        kw.rec_fb_from_doc(Document.get(1), 1, self.session)
        self.session.add_doc_recom_list(Document.get_many([1, 2, 6]))
        self.session.update_kw_feedback(kw, kw.fb_weighted_sum(self.session))

        actual = FilterRepository.filters["kw_fb"]([kw])
        expected = Keyword.get_many(["redis"])

        self.assertEqual(expected, actual)
Exemplo n.º 13
0
    def test_two(self):
        docs = [Document.get_many([8,10]), Document.get_many([3,4]), Document.get_many([2,1])]
        kws = [Keyword.get_many(["a", "the"]), Keyword.get_many(["python", "database"]), Keyword.get_many(["database", "redis"])]

        scores = self.e.evaluate(docs, kws)
        expected = ([0.34491169135422844, 0.1726882003112921, 1.0],
                    [0.4834283906452939, 0.759679156743632, 0.9999999999999999])

        self.assertArrayAlmostEqual(expected[0], scores[0])
        self.assertArrayAlmostEqual(expected[1], scores[1])
Exemplo n.º 14
0
    def test_kw_fb_filter(self):
        kw = Keyword.get("redis")
        kw.rec_fb_from_doc(Document.get(1), 1, self.session)
        self.session.add_doc_recom_list(Document.get_many([1, 2, 6]))
        self.session.update_kw_feedback(kw, kw.fb_weighted_sum(self.session))

        actual = FilterRepository.filters["kw_fb"]([kw])
        expected = Keyword.get_many(["redis"])

        self.assertEqual(expected, actual)
Exemplo n.º 15
0
 def test_associated_keywords_from_documents(self):
     kws = self.r.associated_keywords_from_docs(Document.get_many([1,2]))
     self.assertEqual(set(Keyword.get_many(["a", "database", "redis", "the"])),
                      set(kws))
     
     exclude_kws = [Keyword.get("redis")]
     kws = self.r.associated_keywords_from_docs(Document.get_many([1,2]), exclude_kws)
     
     self.assertEqual(set(Keyword.get_many(["a", "database", "the"])), 
                      set(kws))
Exemplo n.º 16
0
    def test_kw_hashable(self):
        d = {}
        kwlist1 = Keyword.get_many(["redis", "a", "the"])
        kwlist2 = Keyword.get_many(["a", "the", "redis"])
        kwlist3 = Keyword.get_many(["redis", "a", "python"])

        d[kwlist1] = 1
        d[kwlist2] = 2  #override
        d[kwlist3] = 3

        self.assertEqual({kwlist1: 2, kwlist3: 3}, d)
Exemplo n.º 17
0
    def test_sample_documents_associated_with_keywords(self):
        """
        normal case
        """
        docs = self.r.sample_documents_associated_with_keywords(Keyword.get_many(["python", "redis"]), 2)

        self.assertEqual(2, len(docs))

        for doc in docs:
            self.assertTrue((Keyword.get("python") in doc.keywords) or \
                            (Keyword.get("redis") in doc.keywords))
Exemplo n.º 18
0
    def test_kw_hashable(self):
        d = {}
        kwlist1 = Keyword.get_many(["redis", "a", "the"])
        kwlist2 = Keyword.get_many(["a", "the", "redis"])
        kwlist3 = Keyword.get_many(["redis", "a", "python"])

        d[kwlist1] = 1
        d[kwlist2] = 2 #override
        d[kwlist3] = 3 

        self.assertEqual({kwlist1:2, kwlist3: 3}, d)
Exemplo n.º 19
0
    def test_associated_keywords_from_documents(self):
        kws = self.r.associated_keywords_from_docs(Document.get_many([1, 2]))
        self.assertEqual(
            set(Keyword.get_many(["a", "database", "redis", "the"])), set(kws))

        exclude_kws = [Keyword.get("redis")]
        kws = self.r.associated_keywords_from_docs(Document.get_many([1, 2]),
                                                   exclude_kws)

        self.assertEqual(set(Keyword.get_many(["a", "database", "the"])),
                         set(kws))
Exemplo n.º 20
0
    def test_affected_kws(self):
        kws = [Keyword.get("python"), Keyword.get("redis")]
        
        self.session.add_affected_kws(*kws)
        self.assertEqual(kws, self.session.affected_kws)

        kw3 = Keyword.get("a")
        kws.append(kw3)
        self.session.add_affected_kws(kw3)
        self.assertEqual(set(kws), 
                         set(self.session.affected_kws))
Exemplo n.º 21
0
    def test_model2modellist_similarity(self):
        #for keywords
        kw = Keyword.get("redis")
        kwlist = Keyword.get_many(["database", "mysql"])

        self.assertAlmostEqual(0.3754029265429976, kw.similarity_to(kwlist))

        #for documents
        doc = Document.get(6)
        doclist = Document.get_many([1, 2])

        self.assertAlmostEqual(0.7382455893131392, doc.similarity_to(doclist))
Exemplo n.º 22
0
    def test_model2modellist_similarity(self):
        #for keywords
        kw = Keyword.get("redis")
        kwlist = Keyword.get_many(["database", "mysql"])

        self.assertAlmostEqual(0.3754029265429976, kw.similarity_to(kwlist))
        
        #for documents
        doc = Document.get(6)
        doclist = Document.get_many([1, 2])
        
        self.assertAlmostEqual(0.7382455893131392, doc.similarity_to(doclist))
Exemplo n.º 23
0
    def test_sample_documents_associated_with_keywords(self):
        """
        normal case
        """
        docs = self.r.sample_documents_associated_with_keywords(
            Keyword.get_many(["python", "redis"]), 2)

        self.assertEqual(2, len(docs))

        for doc in docs:
            self.assertTrue((Keyword.get("python") in doc.keywords) or \
                            (Keyword.get("redis") in doc.keywords))
Exemplo n.º 24
0
    def test_fb_from_doc(self):
        doc = Document.get(1)
        ppgt.fb_from_doc(doc, 0.5, self.session)        
        
        upd.update(self.session)
        
        # assertions
        self.assertAlmostEqual(.5 * .7, doc.fb(self.session))

        self.assertAlmostEqual(1/2., Keyword.get("a").fb(self.session))
        self.assertAlmostEqual(1/4., Keyword.get("redis").fb(self.session))
        self.assertAlmostEqual(1/4., Keyword.get("database").fb(self.session))
Exemplo n.º 25
0
    def test_modellist2modellist_similarity(self):
        #for keywords
        kwlist1 = Keyword.get_many(["redis", "a"])
        kwlist2 = Keyword.get_many(["database", "the"])

        self.assertAlmostEqual(0.42205423035497763, kwlist1.similarity_to(kwlist2))
        
        #for documents
        doclist1 = Document.get_many([3,5])
        doclist2 = Document.get_many([4,6])
        
        self.assertAlmostEqual(0.6990609119502719, doclist1.similarity_to(doclist2))
Exemplo n.º 26
0
    def setUp(self):
        self.r = LinRelRecommender(2, 2, 1., .1, 1., .1, None, None,
                                   **fmim.__dict__)

        self.session = get_session()

        #giving the feedbacks
        self.session.update_kw_feedback(Keyword.get("redis"), .7)
        self.session.update_kw_feedback(Keyword.get("database"), .6)

        self.session.update_doc_feedback(Document.get(1), .7)
        self.session.update_doc_feedback(Document.get(2), .7)
        self.session.update_doc_feedback(Document.get(8), .7)
Exemplo n.º 27
0
    def test_keyword_centroid(self):
        kw = Keyword.get("a")
        kwlist1 = KeywordList([kw])
        
        self.assertArrayAlmostEqual(matrix2array(kwlist1.centroid), kw.vec.toarray()[0])

        kw1 = Keyword.get("a")
        kw2 = Keyword.get("the")
        
        kwlist2 = Keyword.get_many(["a", "the"])
        
        self.assertArrayAlmostEqual(matrix2array(kwlist2.centroid), 
                                    (kw1.vec.toarray()[0] + kw2.vec.toarray()[0]) / 2)
Exemplo n.º 28
0
    def test_rec_fb_from_dockw(self):
        """
        getter/setting for receiving feedback from in-document keyword
        """
        kw = Keyword.get("redis")
        
        kw.rec_fb_from_dockw(kw, Document.get(2), .5, self.session)
        kw.rec_fb_from_dockw(kw, Document.get(1), 1, self.session)
        
        self.assertEqual(kw.fb_from_doc(self.session), {Document.get(1): 1, Document.get(2): .5})

        #is not the right keyword
        self.assertRaises(AssertionError, kw.rec_fb_from_dockw, Keyword.get("the"), Document.get(1), 1, self.session)
Exemplo n.º 29
0
    def test_modellist2modellist_similarity(self):
        #for keywords
        kwlist1 = Keyword.get_many(["redis", "a"])
        kwlist2 = Keyword.get_many(["database", "the"])

        self.assertAlmostEqual(0.42205423035497763,
                               kwlist1.similarity_to(kwlist2))

        #for documents
        doclist1 = Document.get_many([3, 5])
        doclist2 = Document.get_many([4, 6])

        self.assertAlmostEqual(0.6990609119502719,
                               doclist1.similarity_to(doclist2))
Exemplo n.º 30
0
    def test_equality_same_type(self):
        kwlist1 = Keyword.get_many(["redis", "a", "the"])
        kwlist2 = Keyword.get_many(["a", "the", "redis"])
        kwlist3 = Keyword.get_many(["a", "the", "python"])

        self.assertEqual(kwlist1, kwlist2)
        self.assertNotEqual(kwlist3, kwlist2)

        doclist1 = Document.get_many([1, 2, 3])
        doclist2 = Document.get_many([2, 3, 1])
        doclist3 = Document.get_many([4, 5, 6])

        self.assertEqual(doclist1, doclist2)
        self.assertNotEqual(doclist3, doclist2)
Exemplo n.º 31
0
    def test_equality_same_type(self):
        kwlist1 = Keyword.get_many(["redis", "a", "the"])
        kwlist2 = Keyword.get_many(["a", "the", "redis"])
        kwlist3 = Keyword.get_many(["a", "the", "python"])

        self.assertEqual(kwlist1, kwlist2)
        self.assertNotEqual(kwlist3, kwlist2)

        doclist1 = Document.get_many([1,2,3])
        doclist2 = Document.get_many([2,3,1])
        doclist3 = Document.get_many([4,5,6])

        self.assertEqual(doclist1, doclist2)
        self.assertNotEqual(doclist3, doclist2)
Exemplo n.º 32
0
    def test_rec_fb_from_kw(self):
        """
        getter/setting for receiving feedback from keyword
        """
        kw = Keyword.get("redis")
        kw.rec_fb_from_kw(kw, 1, self.session)
        self.assertEqual(1, kw.fb_from_kw(self.session))

        kw.rec_fb_from_kw(kw, .5, self.session)
        self.assertEqual(.5, kw.fb_from_kw(self.session))

        #is not the right keyword
        self.assertRaises(AssertionError, kw.rec_fb_from_kw,
                          Keyword.get("the"), 1, self.session)
Exemplo n.º 33
0
    def test_rec_fb_from_kw(self):
        """
        getter/setting for receiving feedback from keyword
        """
        kw = Keyword.get("redis")
        kw.rec_fb_from_kw(kw, 1, self.session)
        self.assertEqual(1, 
                         kw.fb_from_kw(self.session))
        
        kw.rec_fb_from_kw(kw, .5, self.session)
        self.assertEqual(.5, 
                         kw.fb_from_kw(self.session))

        #is not the right keyword
        self.assertRaises(AssertionError, kw.rec_fb_from_kw, Keyword.get("the"), 1, self.session)
Exemplo n.º 34
0
    def test_keyword_centroid(self):
        kw = Keyword.get("a")
        kwlist1 = KeywordList([kw])

        self.assertArrayAlmostEqual(matrix2array(kwlist1.centroid),
                                    kw.vec.toarray()[0])

        kw1 = Keyword.get("a")
        kw2 = Keyword.get("the")

        kwlist2 = Keyword.get_many(["a", "the"])

        self.assertArrayAlmostEqual(
            matrix2array(kwlist2.centroid),
            (kw1.vec.toarray()[0] + kw2.vec.toarray()[0]) / 2)
Exemplo n.º 35
0
    def test_recommend(self):
        docs, kws = self.r.recommend(self.session, 4, 4, 1, .5, 1., .5)

        self.assertEqual(Document.get_many([1, 8, 2, 6]), docs)
        self.assertEqual(
            Keyword.get_many(
                ["redis", "database", "the", "mysql", "a", "python"]), kws)
Exemplo n.º 36
0
 def test_recommend_keywords(self):
     kws = self.r.recommend_keywords(self.fmim, self.session, 8, 1, 0.5)
     self.assertEqual(
         list(
             Keyword.get_many(
                 ["redis", "database", "python", "mysql", "tornado",
                  "web"])), kws)
Exemplo n.º 37
0
    def setUp(self):
        doc_goal = Document.get_many([1,2])
        kw_goal = Keyword.get_many(["redis", "database"])
        
        self.e = GoalBasedEvaluator()

        self.e.setGoal(doc_goal, kw_goal)
Exemplo n.º 38
0
    def setUp(self):
        init_recommender = QueryBasedRecommender(3, 2, 
                                                 3, 2, 
                                                 **fmim.__dict__)
        main_recommender = LinRelRecommender(3, 3, 
                                             1., .5, 
                                             1., .5, 
                                             None,None,
                                             None,None,
                                             **fmim.__dict__)

        self.app = CmdApp(OnePassPropagator, OverrideUpdater, 
                          init_recommender, main_recommender)        
        
        self.session = get_session()
        
        #add recommended list
        self.session.add_doc_recom_list(Document.get_many([1,2,3]))
        self.session.add_kw_recom_list(Keyword.get_many(["a", "redis", "database"]))
        
        self.fb = {
            "docs": [[1, .5]],
            "kws": [["redis", .5]],
            "dockws": [["redis", 1, .5]]
        }
        
        random.seed(123456)
Exemplo n.º 39
0
    def test_loop_done(self):
        """
        test if things are cleaned when the loop is done
        """
        doc = Document.get(1)

        doc.rec_fb_from_dockw(Keyword.get("redis"), doc, 1, self.session)
        doc.rec_fb_from_kw(Keyword.get("database"), .5, self.session)

        doc.rec_fb_from_doc(doc, .5, self.session)

        # terminate the loop
        # everything feedback stuff cleaned
        doc.loop_done(self.session)

        self.assertEqual(doc.fb_weighted_sum(self.session), 0)
Exemplo n.º 40
0
    def test_loop_done(self):
        """
        test if things are cleaned when the loop is done
        """
        doc = Document.get(1)

        doc.rec_fb_from_dockw(Keyword.get("redis"), doc, 1, self.session)
        doc.rec_fb_from_kw(Keyword.get("database"), .5, self.session)

        doc.rec_fb_from_doc(doc, .5, self.session)

        # terminate the loop
        # everything feedback stuff cleaned
        doc.loop_done(self.session)

        self.assertEqual(doc.fb_weighted_sum(self.session), 0)
Exemplo n.º 41
0
    def setUp(self):
        doc_goal = Document.get_many([1, 2])
        kw_goal = Keyword.get_many(["redis", "database"])

        self.e = GoalBasedEvaluator()

        self.e.setGoal(doc_goal, kw_goal)
Exemplo n.º 42
0
    def test_recommend_main(self):
        #receive the feedback first
        self.app.receive_feedbacks(self.session, self.fb)

        docs, kws = self.app.recommend(start=False, session=self.session)
        self.assertEqual(Document.get_many([1, 2, 6]), docs)
        self.assertEqual(
            Keyword.get_many(["redis", "database", "a", "python", "the"]), kws)
Exemplo n.º 43
0
 def test_update_kw_fb(self):
     """update keyword feedback"""
     kw = Keyword.get('redis')
     self.session.update_kw_feedback(kw, 1)
     
     self.assertEqual(self.session.kw_feedbacks, {kw: 1})
     
     self.assertEqual(kw.fb(self.session), 1)
Exemplo n.º 44
0
    def test_rec_fb_from_dockw(self):
        """
        getter/setting for receiving feedback from in-document keyword
        """
        kw = Keyword.get("redis")

        kw.rec_fb_from_dockw(kw, Document.get(2), .5, self.session)
        kw.rec_fb_from_dockw(kw, Document.get(1), 1, self.session)

        self.assertEqual(kw.fb_from_doc(self.session), {
            Document.get(1): 1,
            Document.get(2): .5
        })

        #is not the right keyword
        self.assertRaises(AssertionError, kw.rec_fb_from_dockw,
                          Keyword.get("the"), Document.get(1), 1, self.session)
Exemplo n.º 45
0
    def test_one(self):
        docs = [
            Document.get_many([1, 2]),
            Document.get_many([1, 2]),
            Document.get_many([2, 1])
        ]
        kws = [
            Keyword.get_many(["redis", "database"]),
            Keyword.get_many(["redis", "database"]),
            Keyword.get_many(["redis", "database"])
        ]

        scores = self.e.evaluate(docs, kws)
        expected = ([1, 1, 1], [1, 1, 1])

        self.assertArrayAlmostEqual(expected[0], scores[0])
        self.assertArrayAlmostEqual(expected[1], scores[1])
Exemplo n.º 46
0
 def test_sample_documents_associated_with_keywords_sample_size_too_large(
         self):
     """
     in case the sample size is too large
     """
     docs = self.r.sample_documents_associated_with_keywords(
         Keyword.get_many(["python"]), 999)
     self.assertEqual(Document.get_many([3, 4, 5, 6, 8]), docs)
Exemplo n.º 47
0
 def test_recommend_keywords(self):
     kws = self.r.recommend_keywords(Document.get_many([6, 1]), 5, 3, 
                                     query_keywords = Keyword.get_many(["python", "redis", 
                                                                        "non-existing"]))
     kw_from_recom_docs = kws[:3]
     kw_from_assoc_docs = kws[3:]
     
     self.assertEqual(5, len(kws))
     self.assertEqual(list(Keyword.get_many(["python", "redis"])), kw_from_recom_docs[:2]) #the first two should be python and redis
     
     for kw in kw_from_recom_docs:
         self.assertTrue(kw["recommended"])
     for kw in kw_from_assoc_docs:
         self.assertFalse(kw["recommended"])
         
     #no easy way to further test the elements of the kws
     pass
Exemplo n.º 48
0
 def test_sample_documents_associated_with_keywords_not_existing_keywords_case(
         self):
     """
     in case keywords are non-existant in the corpus
     """
     docs = self.r.sample_documents_associated_with_keywords(
         Keyword.get_many(["foo", "bar", "baz"]), 999)
     self.assertEqual(0, len(docs))
Exemplo n.º 49
0
    def test_recommend(self):
        docs, kws = self.r.recommend(self.session, 
                                     4, 4, 
                                     1, .5,
                                     1., .5)

        self.assertEqual(Document.get_many([1,8,2,6]), docs)
        self.assertEqual(Keyword.get_many(["redis", "database", "the", "mysql", "a", "python"]), kws)
Exemplo n.º 50
0
    def setUp(self):
        self.r = LinRelRecommender(2, 2, 
                                   1., .1, 1., .1,
                                   None, None,
                                   **fmim.__dict__)
        
        
        
        self.session = get_session()

        #giving the feedbacks
        self.session.update_kw_feedback(Keyword.get("redis"), .7)
        self.session.update_kw_feedback(Keyword.get("database"), .6)
        
        self.session.update_doc_feedback(Document.get(1), .7)
        self.session.update_doc_feedback(Document.get(2), .7)
        self.session.update_doc_feedback(Document.get(8), .7)
Exemplo n.º 51
0
 def test_fb_from_dockw(self):
     kw = Keyword.get("redis")
     doc = Document.get(1)
     
     ppgt.fb_from_dockw(kw, doc, .5, self.session)
     upd.update(self.session)
     
     self.assertAlmostEqual(0.183701573217, doc.fb(self.session))
     self.assertAlmostEqual(1/4., kw.fb(self.session))
Exemplo n.º 52
0
 def test_recommend_main(self):
     #receive the feedback first
     self.app.receive_feedbacks(self.session, self.fb)
     
     docs , kws = self.app.recommend(start = False, session = self.session)
     self.assertEqual(Document.get_many([1,2,6]), 
                      docs)
     self.assertEqual(Keyword.get_many(["redis", "database", "a", "python", "the"]), 
                      kws)
Exemplo n.º 53
0
 def test_recommend_documents_sensible_query(self):
     """
     query that has keywords existing in the documents' keyword list
     """
     query = "database, python, redis"
     matched_docs, query_keywords = self.r.recommend_documents(query, 4)
     
     self.assertEqual(Document.get_many([6,1,2,5]), matched_docs)
     self.assertEqual(Keyword.get_many(["database", "python", "redis"]), query_keywords)
Exemplo n.º 54
0
    def test_rec_fb_from_kw(self):
        """
        getter/setting for receiving feedback from keyword
        """
        doc = Document.get(1)

        doc.rec_fb_from_kw(Keyword.get("redis"), 1, self.session)
        doc.rec_fb_from_kw(Keyword.get("database"), .5, self.session)
        
        self.assertEqual(doc.fb_from_kw(self.session), {Keyword.get("redis"): 1, Keyword.get("database"): .5})

        #does not contain redis, error should be raised
        self.assertRaises(AssertionError, doc.rec_fb_from_kw, Keyword.get("python"), 1, self.session)
                

        #test the weighted sum
        weights = [0.62981539329519109, 0.45460437826405437, 0.62981539329519109]                
        self.assertEqual((weights[0] * 1 + weights[1] * .5) / sum(weights), doc.fb_weighted_sum(self.session))
Exemplo n.º 55
0
    def test_two(self):
        docs = [
            Document.get_many([8, 10]),
            Document.get_many([3, 4]),
            Document.get_many([2, 1])
        ]
        kws = [
            Keyword.get_many(["a", "the"]),
            Keyword.get_many(["python", "database"]),
            Keyword.get_many(["database", "redis"])
        ]

        scores = self.e.evaluate(docs, kws)
        expected = ([0.34491169135422844, 0.1726882003112921, 1.0], [
            0.4834283906452939, 0.759679156743632, 0.9999999999999999
        ])

        self.assertArrayAlmostEqual(expected[0], scores[0])
        self.assertArrayAlmostEqual(expected[1], scores[1])
Exemplo n.º 56
0
 def test_recommend(self):
     docs, kws = self.r.recommend(self.session,
                                  4, 4, 
                                  1, .5,
                                  1., .5,
                                  kw_filters = [self.my_kw_filter],
                                  doc_filters = [self.kw_count_filter, self.has_database_filter])
     print self.fmim.doc2kw_m.shape
     self.assertEqual(Document.get_many([2,1,6,7]), docs)
     self.assertEqual(Keyword.get_many(["redis", "database", "python", "mysql", "a", "the"]), kws)