Пример #1
0
    def test_scoring_manual(self):
        input = pd.DataFrame([["q1", "fox", "d1", "all the fox were fox"]],
                             columns=["qid", "query", "docno", "body"])
        from pyterrier.batchretrieve import TextScorer
        scorer = TextScorer(wmodel="Tf")
        rtr = scorer(input)
        self.assertEqual(1, len(rtr))
        self.assertTrue("score" in rtr.columns)
        self.assertEqual(2, rtr.iloc[0]["score"])

        scorer = TextScorer(wmodel="org.terrier.python.TestModel$TFOverN")
        rtr = scorer(input)
        self.assertEqual(1, len(rtr))
        self.assertTrue("score" in rtr.columns)
        self.assertEqual(2, rtr.iloc[0]["score"])  # tf / numdocs
Пример #2
0
 def test_scoring_manual_empty(self):
     input = pd.DataFrame([["q1", "fox", "d1", ""]],
                          columns=["qid", "query", "docno", "body"])
     from pyterrier.batchretrieve import TextScorer
     scorer = TextScorer(wmodel="Tf")
     rtr = scorer(input)
     self.assertEqual(1, len(rtr))
     self.assertTrue("score" in rtr.columns)
     self.assertEqual(0, rtr.iloc[0]["score"])
Пример #3
0
    def test_scoring_manual_background(self):
        input = pd.DataFrame([["q1", "fox", "d1", "all the fox were fox"]],
                             columns=["qid", "query", "docno", "body"])
        from pyterrier.batchretrieve import TextScorer
        scorer = TextScorer(
            wmodel="Tf",
            background_index=pt.get_dataset("vaswani").get_index())
        rtr = scorer(input)
        self.assertEqual(1, len(rtr))
        self.assertTrue("score" in rtr.columns)
        self.assertEqual(2, rtr.iloc[0]["score"])

        index_background = pt.IndexFactory.of(
            pt.get_dataset("vaswani").get_index())
        scorer = TextScorer(wmodel="org.terrier.python.TestModel$TFOverN",
                            background_index=index_background)
        rtr = scorer(input)
        self.assertEqual(1, len(rtr))
        self.assertTrue("score" in rtr.columns)
        self.assertEqual(
            2 /
            index_background.getCollectionStatistics().getNumberOfDocuments(),
            rtr.iloc[0]["score"])