示例#1
0
    def test_feature_union_multi(self):
        mock0 = ptt.UniformTransformer(
            pd.DataFrame([["q1", "doc1", 0]],
                         columns=["qid", "docno", "score"]))

        mock1 = ptt.UniformTransformer(
            pd.DataFrame([["q1", "doc1", 5]],
                         columns=["qid", "docno", "score"]))
        mock2 = ptt.UniformTransformer(
            pd.DataFrame([["q1", "doc1", 10]],
                         columns=["qid", "docno", "score"]))
        mock3 = ptt.UniformTransformer(
            pd.DataFrame([["q1", "doc1", 15]],
                         columns=["qid", "docno", "score"]))

        mock12a = mock1**mock2
        mock123a = mock1**mock2**mock3
        mock123b = mock12a**mock3

        self.assertEqual(2, len(mock12a.models))
        self.assertEqual(2, len(mock12a.models))
        ptt.setup_rewrites()

        mock123_simple = mock123a.compile()
        self.assertIsNotNone(mock123_simple)
        self.assertEqual(
            "FeatureUnionPipeline(UniformTransformer(), UniformTransformer(), UniformTransformer())",
            mock123_simple.__repr__())
        #
        #mock123a, mock123b
        self.assertEqual(3, len(mock123_simple.models))
        for expression in [mock123_simple]:
            # we dont need an input, as both Identity transformers will return anyway
            rtr = (mock0 >> expression).transform(None)
            self.assertIsNotNone(rtr)
            self.assertEqual(1, len(rtr))
            self.assertTrue("qid" in rtr.columns)
            self.assertTrue("docno" in rtr.columns)
            self.assertTrue("score" in rtr.columns)
            self.assertTrue("features" in rtr.columns)
            self.assertTrue("q1" in rtr["qid"].values)
            self.assertTrue("doc1" in rtr["docno"].values)
            import numpy as np
            self.assertTrue(
                np.array_equal(np.array([5, 10, 15]), rtr.iloc[0]["features"]))
示例#2
0
    def test_feature_union_multi(self):
        import pyterrier.transformer as ptt
        mock0 = pt.Transformer.from_df(pd.DataFrame(
            [["q1", "doc1", 0], ["q1", "doc2", 0]],
            columns=["qid", "docno", "score"]),
                                       uniform=True)

        mock1 = pt.Transformer.from_df(pd.DataFrame(
            [["q1", "doc1", 5], ["q1", "doc2", 0]],
            columns=["qid", "docno", "score"]),
                                       uniform=True)
        mock2 = pt.Transformer.from_df(pd.DataFrame(
            [["q1", "doc1", 10], ["q1", "doc2", 0]],
            columns=["qid", "docno", "score"]),
                                       uniform=True)
        mock3 = pt.Transformer.from_df(pd.DataFrame(
            [["q1", "doc1", 15], ["q1", "doc2", 0]],
            columns=["qid", "docno", "score"]),
                                       uniform=True)

        mock3_empty = pt.Transformer.from_df(pd.DataFrame(
            [], columns=["qid", "docno", "score"]),
                                             uniform=True)
        mock2_partial = pt.Transformer.from_df(pd.DataFrame(
            [["q1", "doc1", 10]], columns=["qid", "docno", "score"]),
                                               uniform=True)
        mock3_partial = pt.Transformer.from_df(pd.DataFrame(
            [["q1", "doc1", 15]], columns=["qid", "docno", "score"]),
                                               uniform=True)

        mock12a = mock1**mock2
        mock123a = mock1**mock2**mock3
        mock123b = mock12a**mock3
        mock123a_manual = ptt.FeatureUnionPipeline(
            ptt.FeatureUnionPipeline(mock1, mock2), mock3)
        mock123b_manual = ptt.FeatureUnionPipeline(
            mock1,
            ptt.FeatureUnionPipeline(mock2, mock3),
        )
        mock123e = ptt.FeatureUnionPipeline(
            mock1,
            ptt.FeatureUnionPipeline(mock2, mock3_empty),
        )

        mock12e3 = ptt.FeatureUnionPipeline(
            mock1,
            ptt.FeatureUnionPipeline(mock3_empty, mock3),
        )

        mock123p = ptt.FeatureUnionPipeline(
            mock1,
            ptt.FeatureUnionPipeline(mock2, mock3_partial),
        )

        mock12p3 = ptt.FeatureUnionPipeline(
            mock1,
            ptt.FeatureUnionPipeline(mock2_partial, mock3),
        )

        self.assertEqual(2, len(mock12a.models))
        self.assertEqual(2, len(mock12a.models))
        ptt.setup_rewrites()

        mock123_simple = mock123a.compile()
        self.assertIsNotNone(mock123_simple)
        self.assertEqual(
            "FeatureUnionPipeline(UniformTransformer(), UniformTransformer(), UniformTransformer())",
            mock123_simple.__repr__())
        #
        #
        self.assertEqual(3, len(mock123_simple.models))

        def _test_expression(expression):
            # we dont need an input, as both Identity transformers will return anyway
            rtr = (mock0 >> expression).transform(None)
            #print(rtr)
            self.assertIsNotNone(rtr)
            self.assertEqual(2, len(rtr))
            self.assertTrue("qid" in rtr.columns)
            self.assertTrue("docno" in rtr.columns)
            self.assertFalse("features_x" in rtr.columns)
            self.assertFalse("features_y" in rtr.columns)
            self.assertTrue("features" in rtr.columns)
            self.assertTrue("q1" in rtr["qid"].values)
            self.assertTrue("doc1" in rtr["docno"].values)
            import numpy as np
            self.assertTrue(
                np.allclose(np.array([5, 10, 15]), rtr.iloc[0]["features"]))

        _test_expression(mock123_simple)
        _test_expression(mock123a)
        _test_expression(mock123b)
        _test_expression(mock123b)
        with self.assertRaises(ValueError):
            _test_expression(mock123e)
        with self.assertRaises(ValueError):
            _test_expression(mock12e3)

        with warnings.catch_warnings(record=True) as w:
            _test_expression(mock123p)
            assert "Got number of results" in str(w[-1].message)

        with warnings.catch_warnings(record=True) as w:
            _test_expression(mock12p3)
            assert "Got number of results" in str(w[-1].message)