def check_params(test_self, py_stage, check_params_exist=True): """ Checks common requirements for :py:class:`PySpark.ml.Params.params`: - set of params exist in Java and Python and are ordered by names - param parent has the same UID as the object's UID - default param value from Java matches value in Python - optionally check if all params from Java also exist in Python """ py_stage_str = "%s %s" % (type(py_stage), py_stage) if not hasattr(py_stage, "_to_java"): return java_stage = py_stage._to_java() if java_stage is None: return test_self.assertEqual(py_stage.uid, java_stage.uid(), msg=py_stage_str) if check_params_exist: param_names = [p.name for p in py_stage.params] java_params = list(java_stage.params()) java_param_names = [jp.name() for jp in java_params] test_self.assertEqual( param_names, sorted(java_param_names), "Param list in Python does not match Java for %s:\nJava = %s\nPython = %s" % (py_stage_str, java_param_names, param_names), ) for p in py_stage.params: test_self.assertEqual(p.parent, py_stage.uid) java_param = java_stage.getParam(p.name) py_has_default = py_stage.hasDefault(p) java_has_default = java_stage.hasDefault(java_param) test_self.assertEqual( py_has_default, java_has_default, "Default value mismatch of param %s for Params %s" % (p.name, str(py_stage)), ) if py_has_default: if p.name == "seed": continue # Random seeds between Spark and PySpark are different java_default = _java2py( test_self.sc, java_stage.clear(java_param).getOrDefault(java_param)) py_stage.clear(p) py_default = py_stage.getOrDefault(p) # equality test for NaN is always False if isinstance(java_default, float) and np.isnan(java_default): java_default = "NaN" py_default = "NaN" if np.isnan(py_default) else "not NaN" test_self.assertEqual( java_default, py_default, "Java default %s != python default %s of param %s for Params %s" % (str(java_default), str(py_default), p.name, str(py_stage)), )
def test_new_java_array(self): # test array of strings str_list = ["a", "b", "c"] java_class = self.sc._gateway.jvm.java.lang.String java_array = JavaWrapper._new_java_array(str_list, java_class) self.assertEqual(_java2py(self.sc, java_array), str_list) # test array of integers int_list = [1, 2, 3] java_class = self.sc._gateway.jvm.java.lang.Integer java_array = JavaWrapper._new_java_array(int_list, java_class) self.assertEqual(_java2py(self.sc, java_array), int_list) # test array of floats float_list = [0.1, 0.2, 0.3] java_class = self.sc._gateway.jvm.java.lang.Double java_array = JavaWrapper._new_java_array(float_list, java_class) self.assertEqual(_java2py(self.sc, java_array), float_list) # test array of bools bool_list = [False, True, True] java_class = self.sc._gateway.jvm.java.lang.Boolean java_array = JavaWrapper._new_java_array(bool_list, java_class) self.assertEqual(_java2py(self.sc, java_array), bool_list) # test array of Java DenseVectors v1 = DenseVector([0.0, 1.0]) v2 = DenseVector([1.0, 0.0]) vec_java_list = [_py2java(self.sc, v1), _py2java(self.sc, v2)] java_class = self.sc._gateway.jvm.org.apache.spark.ml.linalg.DenseVector java_array = JavaWrapper._new_java_array(vec_java_list, java_class) self.assertEqual(_java2py(self.sc, java_array), [v1, v2]) # test empty array java_class = self.sc._gateway.jvm.java.lang.Integer java_array = JavaWrapper._new_java_array([], java_class) self.assertEqual(_java2py(self.sc, java_array), [])
def check_params(test_self, py_stage, check_params_exist=True): """ Checks common requirements for Params.params: - set of params exist in Java and Python and are ordered by names - param parent has the same UID as the object's UID - default param value from Java matches value in Python - optionally check if all params from Java also exist in Python """ py_stage_str = "%s %s" % (type(py_stage), py_stage) if not hasattr(py_stage, "_to_java"): return java_stage = py_stage._to_java() if java_stage is None: return test_self.assertEqual(py_stage.uid, java_stage.uid(), msg=py_stage_str) if check_params_exist: param_names = [p.name for p in py_stage.params] java_params = list(java_stage.params()) java_param_names = [jp.name() for jp in java_params] test_self.assertEqual( param_names, sorted(java_param_names), "Param list in Python does not match Java for %s:\nJava = %s\nPython = %s" % (py_stage_str, java_param_names, param_names)) for p in py_stage.params: test_self.assertEqual(p.parent, py_stage.uid) java_param = java_stage.getParam(p.name) py_has_default = py_stage.hasDefault(p) java_has_default = java_stage.hasDefault(java_param) test_self.assertEqual(py_has_default, java_has_default, "Default value mismatch of param %s for Params %s" % (p.name, str(py_stage))) if py_has_default: if p.name == "seed": continue # Random seeds between Spark and PySpark are different java_default = _java2py(test_self.sc, java_stage.clear(java_param).getOrDefault(java_param)) py_stage._clear(p) py_default = py_stage.getOrDefault(p) # equality test for NaN is always False if isinstance(java_default, float) and np.isnan(java_default): java_default = "NaN" py_default = "NaN" if np.isnan(py_default) else "not NaN" test_self.assertEqual( java_default, py_default, "Java default %s != python default %s of param %s for Params %s" % (str(java_default), str(py_default), p.name, str(py_stage)))