Пример #1
0
    def _from_java_impl(cls, java_stage):
        """
        Return Python estimator, estimatorParamMaps, and evaluator from a Java ValidatorParams.
        """

        # Load information from java_stage to the instance.
        estimator = JavaParams._from_java(java_stage.getEstimator())
        evaluator = JavaParams._from_java(java_stage.getEvaluator())
        epms = [estimator._transfer_param_map_from_java(epm)
                for epm in java_stage.getEstimatorParamMaps()]
        return estimator, epms, evaluator
Пример #2
0
    def _from_java(cls, java_stage):
        """
        Given a Java CrossValidatorModel, create and return a Python wrapper of it.
        Used for ML persistence.
        """
        bestModel = JavaParams._from_java(java_stage.bestModel())
        estimator, epms, evaluator = super(CrossValidatorModel, cls)._from_java_impl(java_stage)

        py_stage = cls(bestModel=bestModel).setEstimator(estimator)
        py_stage = py_stage.setEstimatorParamMaps(epms).setEvaluator(evaluator)

        if java_stage.hasSubModels():
            py_stage.subModels = [[JavaParams._from_java(sub_model)
                                   for sub_model in fold_sub_models]
                                  for fold_sub_models in java_stage.subModels()]

        py_stage._resetUid(java_stage.uid())
        return py_stage
Пример #3
0
    def _from_java(cls, java_stage):
        """
        Given a Java TrainValidationSplitModel, create and return a Python wrapper of it.
        Used for ML persistence.
        """

        # Load information from java_stage to the instance.
        bestModel = JavaParams._from_java(java_stage.bestModel())
        estimator, epms, evaluator = super(TrainValidationSplitModel,
                                           cls)._from_java_impl(java_stage)
        # Create a new instance of this stage.
        py_stage = cls(bestModel=bestModel).setEstimator(estimator)
        py_stage = py_stage.setEstimatorParamMaps(epms).setEvaluator(evaluator)

        if java_stage.hasSubModels():
            py_stage.subModels = [JavaParams._from_java(sub_model)
                                  for sub_model in java_stage.subModels()]

        py_stage._resetUid(java_stage.uid())
        return py_stage
Пример #4
0
    def _from_java(cls, java_stage):
        """
        Given a Java TrainValidationSplitModel, create and return a Python wrapper of it.
        Used for ML persistence.
        """

        # Load information from java_stage to the instance.
        bestModel = JavaParams._from_java(java_stage.bestModel())
        estimator, epms, evaluator = super(TrainValidationSplitModel,
                                           cls)._from_java_impl(java_stage)
        # Create a new instance of this stage.
        py_stage = cls(bestModel=bestModel).setEstimator(estimator)
        py_stage = py_stage.setEstimatorParamMaps(epms).setEvaluator(evaluator)

        if java_stage.hasSubModels():
            py_stage.subModels = [JavaParams._from_java(sub_model)
                                  for sub_model in java_stage.subModels()]

        py_stage._resetUid(java_stage.uid())
        return py_stage
Пример #5
0
 def _from_java(cls, java_stage):
     """
     Given a Java PipelineModel, create and return a Python wrapper of it.
     Used for ML persistence.
     """
     # Load information from java_stage to the instance.
     py_stages = [JavaParams._from_java(s) for s in java_stage.stages()]
     # Create a new instance of this stage.
     py_stage = cls(py_stages)
     py_stage._resetUid(java_stage.uid())
     return py_stage
Пример #6
0
 def _from_java(cls, java_stage):
     """
     Given a Java PipelineModel, create and return a Python wrapper of it.
     Used for ML persistence.
     """
     # Load information from java_stage to the instance.
     py_stages = [JavaParams._from_java(s) for s in java_stage.stages()]
     # Create a new instance of this stage.
     py_stage = cls(py_stages)
     py_stage._resetUid(java_stage.uid())
     return py_stage
Пример #7
0
    def _from_java_impl(cls, java_stage):
        """
        Return Python estimator, estimatorParamMaps, and evaluator from a Java ValidatorParams.
        """

        # Load information from java_stage to the instance.
        estimator = JavaParams._from_java(java_stage.getEstimator())
        evaluator = JavaParams._from_java(java_stage.getEvaluator())
        if isinstance(estimator, JavaEstimator):
            epms = [
                estimator._transfer_param_map_from_java(epm)
                for epm in java_stage.getEstimatorParamMaps()
            ]
        elif MetaAlgorithmReadWrite.isMetaEstimator(estimator):
            # Meta estimator such as Pipeline, OneVsRest
            epms = _ValidatorSharedReadWrite.meta_estimator_transfer_param_maps_from_java(
                estimator, java_stage.getEstimatorParamMaps())
        else:
            raise ValueError('Unsupported estimator used in tuning: ' +
                             str(estimator))

        return estimator, epms, evaluator
Пример #8
0
 def loadNativeModelFromString(model,
                               labelColName="label",
                               featuresColName="features",
                               predictionColName="prediction"):
     """
     Load the model from a native LightGBM model string.
     """
     ctx = SparkContext._active_spark_context
     loader = ctx._jvm.com.microsoft.ml.spark.LightGBMRegressionModel
     java_model = loader.loadNativeModelFromString(model, labelColName,
                                                   featuresColName,
                                                   predictionColName)
     return JavaParams._from_java(java_model)
Пример #9
0
 def loadNativeModelFromFile(filename,
                             labelColName="label",
                             featuresColName="features",
                             predictionColName="prediction"):
     """
     Load the model from a native LightGBM text file.
     """
     ctx = SparkContext._active_spark_context
     loader = ctx._jvm.com.microsoft.ml.spark.LightGBMRankerModel
     java_model = loader.loadNativeModelFromFile(filename, labelColName,
                                                 featuresColName,
                                                 predictionColName)
     return JavaParams._from_java(java_model)
Пример #10
0
 def _from_java(cls, java_stage: "JavaObject") -> "Pipeline":
     """
     Given a Java Pipeline, create and return a Python wrapper of it.
     Used for ML persistence.
     """
     # Create a new instance of this stage.
     py_stage = cls()
     # Load information from java_stage to the instance.
     py_stages: List["PipelineStage"] = [
         JavaParams._from_java(s) for s in java_stage.getStages()
     ]
     py_stage.setStages(py_stages)
     py_stage._resetUid(java_stage.uid())
     return py_stage
Пример #11
0
    def _from_java(cls, java_stage):
        """
        Given a Java TrainValidationSplitModel, create and return a Python wrapper of it.
        Used for ML persistence.
        """

        # Load information from java_stage to the instance.
        sc = SparkContext._active_spark_context
        bestModel = JavaParams._from_java(java_stage.bestModel())
        validationMetrics = _java2py(sc, java_stage.validationMetrics())
        estimator, epms, evaluator = super(TrainValidationSplitModel,
                                           cls)._from_java_impl(java_stage)
        # Create a new instance of this stage.
        py_stage = cls(bestModel=bestModel,
                       validationMetrics=validationMetrics)._set(estimator=estimator)
        py_stage = py_stage._set(estimatorParamMaps=epms)._set(evaluator=evaluator)

        if java_stage.hasSubModels():
            py_stage.subModels = [JavaParams._from_java(sub_model)
                                  for sub_model in java_stage.subModels()]

        py_stage._resetUid(java_stage.uid())
        return py_stage
Пример #12
0
    def _from_java(cls, java_stage):
        """
        Given a Java CrossValidatorModel, create and return a Python wrapper of it.
        Used for ML persistence.
        """
        sc = SparkContext._active_spark_context
        bestModel = JavaParams._from_java(java_stage.bestModel())
        avgMetrics = _java2py(sc, java_stage.avgMetrics())
        estimator, epms, evaluator = super(CrossValidatorModel,
                                           cls)._from_java_impl(java_stage)

        py_stage = cls(bestModel=bestModel,
                       avgMetrics=avgMetrics).setEstimator(estimator)
        py_stage = py_stage.setEstimatorParamMaps(epms).setEvaluator(evaluator)

        if java_stage.hasSubModels():
            py_stage.subModels = [[
                JavaParams._from_java(sub_model)
                for sub_model in fold_sub_models
            ] for fold_sub_models in java_stage.subModels()]

        py_stage._resetUid(java_stage.uid())
        return py_stage
Пример #13
0
    def _from_java(cls, java_stage):
        """
        Given a Java CrossValidatorModel, create and return a Python wrapper of it.
        Used for ML persistence.
        """

        bestModel = JavaParams._from_java(java_stage.bestModel())
        estimator, epms, evaluator = super(CrossValidatorModel, cls)._from_java_impl(java_stage)

        py_stage = cls(bestModel=bestModel).setEstimator(estimator)
        py_stage = py_stage.setEstimatorParamMaps(epms).setEvaluator(evaluator)

        py_stage._resetUid(java_stage.uid())
        return py_stage
Пример #14
0
    def _from_java(cls, java_stage):
        """
        Given a Java CrossValidatorModel, create and return a Python wrapper of it.
        Used for ML persistence.
        """

        # Load information from java_stage to the instance.
        bestModel = JavaParams._from_java(java_stage.bestModel())
        estimator, epms, evaluator = super(CrossValidatorModel, cls)._from_java_impl(java_stage)
        # Create a new instance of this stage.
        py_stage = cls(bestModel=bestModel)\
            .setEstimator(estimator).setEstimatorParamMaps(epms).setEvaluator(evaluator)
        py_stage._resetUid(java_stage.uid())
        return py_stage
Пример #15
0
    def _from_java(cls, java_stage):
        """
        Given a Java CrossValidatorModel, create and return a Python wrapper of it.
        Used for ML persistence.
        """

        # Load information from java_stage to the instance.
        bestModel = JavaParams._from_java(java_stage.bestModel())
        estimator, epms, evaluator = super(CrossValidatorModel, cls)._from_java_impl(java_stage)
        # Create a new instance of this stage.
        py_stage = cls(bestModel=bestModel)\
            .setEstimator(estimator).setEstimatorParamMaps(epms).setEvaluator(evaluator)
        py_stage._resetUid(java_stage.uid())
        return py_stage
Пример #16
0
    def _to_java(self):
        estimator, epms, evaluator = _ValidatorParams._to_java_impl(self)

        _java_obj = JavaParams._new_java_obj(
            "com.microsoft.azure.synapse.ml.recommendation.RankingTrainValidationSplit",
            self.uid)
        _java_obj.setEstimatorParamMaps(epms)
        _java_obj.setEvaluator(evaluator)
        _java_obj.setEstimator(estimator)
        _java_obj.setTrainRatio(self.getTrainRatio())
        _java_obj.setSeed(self.getSeed())
        _java_obj.setItemCol(self.getItemCol())
        _java_obj.setUserCol(self.getUserCol())
        _java_obj.setRatingCol(self.getRatingCol())

        return _java_obj
Пример #17
0
 def _to_java(self):
     """
     Convert this instance to a dill dump, then to a list of strings with the unicode integer values of each character.
     Use this list as a set of dumby stopwords and store in a StopWordsRemover instance
     :return: Java object equivalent to this instance.
     """
     dmp = dill.dumps(self)
     pylist = [str(ord(d)) for d in dmp]  # convert byes to string integer list
     pylist.append(PysparkObjId._getPyObjId())  # add our id so PysparkPipelineWrapper can id us.
     sc = SparkContext._active_spark_context
     java_class = sc._gateway.jvm.java.lang.String
     java_array = sc._gateway.new_array(java_class, len(pylist))
     for i in xrange(len(pylist)):
         java_array[i] = pylist[i]
     _java_obj = JavaParams._new_java_obj(PysparkObjId._getCarrierClass(javaName=True), self.uid)
     _java_obj.setStopWords(java_array)
     return _java_obj
Пример #18
0
    def _to_java(self):
        """
        Transfer this instance to a Java PipelineModel.  Used for ML persistence.

        :return: Java object equivalent to this instance.
        """

        gateway = SparkContext._gateway
        cls = SparkContext._jvm.org.apache.spark.ml.Transformer
        java_stages = gateway.new_array(cls, len(self.stages))
        for idx, stage in enumerate(self.stages):
            java_stages[idx] = stage._to_java()

        _java_obj =\
            JavaParams._new_java_obj("org.apache.spark.ml.PipelineModel", self.uid, java_stages)

        return _java_obj
Пример #19
0
    def _to_java(self):
        """
        Transfer this instance to a Java CrossValidator. Used for ML persistence.

        :return: Java object equivalent to this instance.
        """

        estimator, epms, evaluator = super(CrossValidator, self)._to_java_impl()

        _java_obj = JavaParams._new_java_obj("org.apache.spark.ml.tuning.CrossValidator", self.uid)
        _java_obj.setEstimatorParamMaps(epms)
        _java_obj.setEvaluator(evaluator)
        _java_obj.setEstimator(estimator)
        _java_obj.setSeed(self.getSeed())
        _java_obj.setNumFolds(self.getNumFolds())

        return _java_obj
Пример #20
0
    def _to_java(self):
        """
        Transfer this instance to a Java CrossValidatorModel. Used for ML persistence.

        :return: Java object equivalent to this instance.
        """

        _java_obj = JavaParams._new_java_obj("org.apache.spark.ml.tuning.CrossValidatorModel",
                                             self.uid,
                                             self.bestModel._to_java(),
                                             self.avgMetrics)
        estimator, epms, evaluator = super(CrossValidatorModel, self)._to_java_impl()

        _java_obj.set("evaluator", evaluator)
        _java_obj.set("estimator", estimator)
        _java_obj.set("estimatorParamMaps", epms)
        return _java_obj
Пример #21
0
    def _to_java(self):
        """
        Transfer this instance to a Java PipelineModel.  Used for ML persistence.

        :return: Java object equivalent to this instance.
        """

        gateway = SparkContext._gateway
        cls = SparkContext._jvm.org.apache.spark.ml.Transformer
        java_stages = gateway.new_array(cls, len(self.stages))
        for idx, stage in enumerate(self.stages):
            java_stages[idx] = stage._to_java()

        _java_obj =\
            JavaParams._new_java_obj("org.apache.spark.ml.PipelineModel", self.uid, java_stages)

        return _java_obj
Пример #22
0
    def _to_java(self):
        """
        Transfer this instance to a Java CrossValidator. Used for ML persistence.

        :return: Java object equivalent to this instance.
        """

        estimator, epms, evaluator = super(CrossValidator, self)._to_java_impl()

        _java_obj = JavaParams._new_java_obj("org.apache.spark.ml.tuning.CrossValidator", self.uid)
        _java_obj.setEstimatorParamMaps(epms)
        _java_obj.setEvaluator(evaluator)
        _java_obj.setEstimator(estimator)
        _java_obj.setSeed(self.getSeed())
        _java_obj.setNumFolds(self.getNumFolds())

        return _java_obj
Пример #23
0
    def _to_java(self):
        """
        Transfer this instance to a Java TrainValidationSplit. Used for ML persistence.
        :return: Java object equivalent to this instance.
        """

        estimator, epms, evaluator = super(TrainValidationSplit, self)._to_java_impl()

        _java_obj = JavaParams._new_java_obj("org.apache.spark.ml.tuning.TrainValidationSplit",
                                             self.uid)
        _java_obj.setEstimatorParamMaps(epms)
        _java_obj.setEvaluator(evaluator)
        _java_obj.setEstimator(estimator)
        _java_obj.setTrainRatio(self.getTrainRatio())
        _java_obj.setSeed(self.getSeed())
        _java_obj.setParallelism(self.getParallelism())

        return _java_obj
Пример #24
0
    def _to_java(self):
        """
        Transfer this instance to a Java TrainValidationSplitModel. Used for ML persistence.
        :return: Java object equivalent to this instance.
        """

        sc = SparkContext._active_spark_context
        # TODO: persst validation metrics as well
        _java_obj = JavaParams._new_java_obj(
            "org.apache.spark.ml.tuning.TrainValidationSplitModel",
            self.uid,
            self.bestModel._to_java(),
            _py2java(sc, []))
        estimator, epms, evaluator = super(TrainValidationSplitModel, self)._to_java_impl()

        _java_obj.set("evaluator", evaluator)
        _java_obj.set("estimator", estimator)
        _java_obj.set("estimatorParamMaps", epms)
        return _java_obj
 def _transfer_param_map_from_java(self, javaParamMap):
     """
     Transforms a Java ParamMap into a Python ParamMap.
     """
     sc = SparkContext._active_spark_context
     paramMap = dict()
     for pair in javaParamMap.toList():
         param = pair.param()
         if self.hasParam(str(param.name())):
             java_obj = pair.value()
             if sc._jvm.Class.forName("org.apache.spark.ml.PipelineStage").isInstance(java_obj):
                 # Note: JavaParams._from_java support both JavaEstimator/JavaTransformer class
                 # and Estimator/Transformer class which implements `_from_java` static method
                 # (such as OneVsRest, Pipeline class).
                 py_obj = JavaParams._from_java(java_obj)
             else:
                 py_obj = _java2py(sc, java_obj)
             paramMap[self.getParam(param.name())] = py_obj
     return paramMap
    def _to_java(self):
        """
        Transfer this instance to a Java TrainValidationSplit. Used for ML persistence.
        :return: Java object equivalent to this instance.
        """

        estimator, epms, evaluator = super(RankingTrainValidationSplit, self)._to_java_impl()

        _java_obj = JavaParams._new_java_obj("com.microsoft.ml.spark.RankingTrainValidationSplit",
                                             self.uid)
        _java_obj.setEstimatorParamMaps(epms)
        _java_obj.setEvaluator(evaluator)
        _java_obj.setEstimator(estimator)
        _java_obj.setTrainRatio(self.getTrainRatio())
        _java_obj.setSeed(self.getSeed())
        _java_obj.setItemCol(self.getItemCol())
        _java_obj.setUserCol(self.getUserCol())
        _java_obj.setRatingCol(self.getRatingCol())

        return _java_obj
Пример #27
0
    def _to_java(self):
        """
        Transfer this instance to a Java Pipeline.  Used for ML persistence.

        Returns
        -------
        py4j.java_gateway.JavaObject
            Java object equivalent to this instance.
        """

        gateway = SparkContext._gateway
        cls = SparkContext._jvm.org.apache.spark.ml.PipelineStage
        java_stages = gateway.new_array(cls, len(self.getStages()))
        for idx, stage in enumerate(self.getStages()):
            java_stages[idx] = stage._to_java()

        _java_obj = JavaParams._new_java_obj("org.apache.spark.ml.Pipeline", self.uid)
        _java_obj.setStages(java_stages)

        return _java_obj
Пример #28
0
    def _to_java(self):
        """
        Transfer this instance to a Java TrainValidationSplitModel. Used for ML persistence.
        :return: Java object equivalent to this instance.
        """

        sc = SparkContext._active_spark_context
        _java_obj = JavaParams._new_java_obj(
            "org.apache.spark.ml.tuning.TrainValidationSplitModel",
            self.uid,
            self.bestModel._to_java(),
            _py2java(sc, self.validationMetrics))
        estimator, epms, evaluator = super(TrainValidationSplitModel, self)._to_java_impl()

        _java_obj.set("evaluator", evaluator)
        _java_obj.set("estimator", estimator)
        _java_obj.set("estimatorParamMaps", epms)

        if self.subModels is not None:
            java_sub_models = [sub_model._to_java() for sub_model in self.subModels]
            _java_obj.setSubModels(java_sub_models)

        return _java_obj
Пример #29
0
def _bucketize(df, input_cols):
    def j_str_arr(arr):
        gateway = SparkContext._gateway
        j_str = gateway.jvm.java.lang.String
        j_arr = gateway.new_array(j_str, len(arr))
        for i, val in enumerate(arr):
            j_arr[i] = val
        return j_arr

    output_cols = ['{}-bucketed'.format(x) for x in input_cols]
    # Sadly the multi-col versions are only in scala, pyspark doesn't
    # have them yet.
    j_bucketizer = (JavaParams._new_java_obj(
        "org.apache.spark.ml.feature.QuantileDiscretizer").setInputCols(
            j_str_arr(input_cols)).setOutputCols(
                j_str_arr(output_cols)).setNumBuckets(254).setRelativeError(
                    1 / 2550).setHandleInvalid('error').fit(df._jdf))
    j_df_bucketized = j_bucketizer.transform(df._jdf)
    df_bucketized = DataFrame(j_df_bucketized, df.sql_ctx).drop(*input_cols)
    # Now we need to assemble the bucketized values into vector
    # form for the feature selector to work with.
    assembler = VectorAssembler(inputCols=output_cols, outputCol='features')
    return assembler.transform(df_bucketized).drop(*output_cols)
Пример #30
0
    def _to_java(self):
        """
        Transfer this instance to a Java CrossValidatorModel. Used for ML persistence.

        Returns
        -------
        py4j.java_gateway.JavaObject
            Java object equivalent to this instance.
        """

        sc = SparkContext._active_spark_context
        _java_obj = JavaParams._new_java_obj(
            "org.apache.spark.ml.tuning.CrossValidatorModel", self.uid,
            self.bestModel._to_java(), _py2java(sc, self.avgMetrics))
        estimator, epms, evaluator = super(CrossValidatorModel,
                                           self)._to_java_impl()

        params = {
            "evaluator": evaluator,
            "estimator": estimator,
            "estimatorParamMaps": epms,
            "numFolds": self.getNumFolds(),
            "foldCol": self.getFoldCol(),
            "seed": self.getSeed(),
        }
        for param_name, param_val in params.items():
            java_param = _java_obj.getParam(param_name)
            pair = java_param.w(param_val)
            _java_obj.set(pair)

        if self.subModels is not None:
            java_sub_models = [[
                sub_model._to_java() for sub_model in fold_sub_models
            ] for fold_sub_models in self.subModels]
            _java_obj.setSubModels(java_sub_models)
        return _java_obj
Пример #31
0
    def _to_java(self):
        """
        Transfer this instance to a Java CrossValidatorModel. Used for ML persistence.

        :return: Java object equivalent to this instance.
        """

        sc = SparkContext._active_spark_context
        # TODO: persist average metrics as well
        _java_obj = JavaParams._new_java_obj("org.apache.spark.ml.tuning.CrossValidatorModel",
                                             self.uid,
                                             self.bestModel._to_java(),
                                             _py2java(sc, []))
        estimator, epms, evaluator = super(CrossValidatorModel, self)._to_java_impl()

        _java_obj.set("evaluator", evaluator)
        _java_obj.set("estimator", estimator)
        _java_obj.set("estimatorParamMaps", epms)

        if self.subModels is not None:
            java_sub_models = [[sub_model._to_java() for sub_model in fold_sub_models]
                               for fold_sub_models in self.subModels]
            _java_obj.setSubModels(java_sub_models)
        return _java_obj
Пример #32
0
    def _to_java(self):
        """
        Transfer this instance to a Java CrossValidatorModel. Used for ML persistence.

        :return: Java object equivalent to this instance.
        """

        sc = SparkContext._active_spark_context
        # TODO: persist average metrics as well
        _java_obj = JavaParams._new_java_obj("org.apache.spark.ml.tuning.CrossValidatorModel",
                                             self.uid,
                                             self.bestModel._to_java(),
                                             _py2java(sc, []))
        estimator, epms, evaluator = super(CrossValidatorModel, self)._to_java_impl()

        _java_obj.set("evaluator", evaluator)
        _java_obj.set("estimator", estimator)
        _java_obj.set("estimatorParamMaps", epms)

        if self.subModels is not None:
            java_sub_models = [[sub_model._to_java() for sub_model in fold_sub_models]
                               for fold_sub_models in self.subModels]
            _java_obj.setSubModels(java_sub_models)
        return _java_obj
Пример #33
0
 def getBestModel(self):
     """
     Returns the best model.
     """
     return JavaParams._from_java(self._java_obj.getBestModel())
Пример #34
0
 def getModel(self):
     """
     Get the underlying model.
     """
     return JavaParams._from_java(self._java_obj.getModel())