Exemplo n.º 1
0
    def test_binarizer(self):
        b0 = Binarizer()
        self.assertListEqual(b0.params, [
            b0.inputCol, b0.inputCols, b0.outputCol, b0.outputCols,
            b0.threshold, b0.thresholds
        ])
        self.assertTrue(all([~b0.isSet(p) for p in b0.params]))
        self.assertTrue(b0.hasDefault(b0.threshold))
        self.assertEqual(b0.getThreshold(), 0.0)
        b0.setParams(inputCol="input", outputCol="output").setThreshold(1.0)
        self.assertTrue(not all([b0.isSet(p) for p in b0.params]))
        self.assertEqual(b0.getThreshold(), 1.0)
        self.assertEqual(b0.getInputCol(), "input")
        self.assertEqual(b0.getOutputCol(), "output")

        b0c = b0.copy({b0.threshold: 2.0})
        self.assertEqual(b0c.uid, b0.uid)
        self.assertListEqual(b0c.params, b0.params)
        self.assertEqual(b0c.getThreshold(), 2.0)

        b1 = Binarizer(threshold=2.0, inputCol="input", outputCol="output")
        self.assertNotEqual(b1.uid, b0.uid)
        self.assertEqual(b1.getThreshold(), 2.0)
        self.assertEqual(b1.getInputCol(), "input")
        self.assertEqual(b1.getOutputCol(), "output")
Exemplo n.º 2
0
 def test_preserve_set_state(self):
     dataset = self.spark.createDataFrame([(0.5,)], ["data"])
     binarizer = Binarizer(inputCol="data")
     self.assertFalse(binarizer.isSet("threshold"))
     binarizer.transform(dataset)
     binarizer._transfer_params_from_java()
     self.assertFalse(binarizer.isSet("threshold"),
                      "Params not explicitly set should remain unset after transform")
Exemplo n.º 3
0
 def test_preserve_set_state(self):
     dataset = self.spark.createDataFrame([(0.5,)], ["data"])
     binarizer = Binarizer(inputCol="data")
     self.assertFalse(binarizer.isSet("threshold"))
     binarizer.transform(dataset)
     binarizer._transfer_params_from_java()
     self.assertFalse(binarizer.isSet("threshold"),
                      "Params not explicitly set should remain unset after transform")
Exemplo n.º 4
0
 def test_default_params_transferred(self):
     dataset = self.spark.createDataFrame([(0.5, )], ["data"])
     binarizer = Binarizer(inputCol="data")
     # intentionally change the pyspark default, but don't set it
     binarizer._defaultParamMap[binarizer.outputCol] = "my_default"
     result = binarizer.transform(dataset).select("my_default").collect()
     self.assertFalse(binarizer.isSet(binarizer.outputCol))
     self.assertEqual(result[0][0], 1.0)
Exemplo n.º 5
0
 def test_default_params_transferred(self):
     dataset = self.spark.createDataFrame([(0.5,)], ["data"])
     binarizer = Binarizer(inputCol="data")
     # intentionally change the pyspark default, but don't set it
     binarizer._defaultParamMap[binarizer.outputCol] = "my_default"
     result = binarizer.transform(dataset).select("my_default").collect()
     self.assertFalse(binarizer.isSet(binarizer.outputCol))
     self.assertEqual(result[0][0], 1.0)
Exemplo n.º 6
0
    def test_binarizer(self):
        b0 = Binarizer()
        self.assertListEqual(b0.params, [b0.inputCol, b0.outputCol, b0.threshold])
        self.assertTrue(all([~b0.isSet(p) for p in b0.params]))
        self.assertTrue(b0.hasDefault(b0.threshold))
        self.assertEqual(b0.getThreshold(), 0.0)
        b0.setParams(inputCol="input", outputCol="output").setThreshold(1.0)
        self.assertTrue(all([b0.isSet(p) for p in b0.params]))
        self.assertEqual(b0.getThreshold(), 1.0)
        self.assertEqual(b0.getInputCol(), "input")
        self.assertEqual(b0.getOutputCol(), "output")

        b0c = b0.copy({b0.threshold: 2.0})
        self.assertEqual(b0c.uid, b0.uid)
        self.assertListEqual(b0c.params, b0.params)
        self.assertEqual(b0c.getThreshold(), 2.0)

        b1 = Binarizer(threshold=2.0, inputCol="input", outputCol="output")
        self.assertNotEqual(b1.uid, b0.uid)
        self.assertEqual(b1.getThreshold(), 2.0)
        self.assertEqual(b1.getInputCol(), "input")
        self.assertEqual(b1.getOutputCol(), "output")