class H2OCommonSupervisedParams(H2OCommonParams):
    labelCol = Param(
        Params._dummy(),
        "labelCol",
        "Label column name",
        H2OTypeConverters.toString())

    def getLabelCol(self):
        return self.getOrDefault(self.labelCol)

    def setLabelCol(self, value):
        return self._set(labelCol=value)
Пример #2
0
class HasValidationLabelCol(Params):
    validationLabelCol = Param(
        Params._dummy(), "validationLabelCol",
        "(experimental) Name of the label column in the validation data frame. "
        "The label column should be a string column with two distinct values indicating the anomaly. "
        "The negative value must be alphabetically smaller than the positive value. (E.g. '0'/'1', 'False'/'True')",
        H2OTypeConverters.toString())

    def getValidationLabelCol(self):
        return self.getOrDefault(self.validationLabelCol)

    def setValidationLabelCol(self, value):
        return self._set(validationLabelCol=value)
class H2OMOJOAlgoSharedParams(Params):
    predictionCol = Param(
        Params._dummy(),
        "predictionCol",
        "Prediction column name",
        H2OTypeConverters.toString())

    detailedPredictionCol = Param(
        Params._dummy(),
        "detailedPredictionCol",
        "Column containing additional prediction details, its content depends on the model type.",
        H2OTypeConverters.toString())

    withDetailedPredictionCol = Param(
        Params._dummy(),
        "withDetailedPredictionCol",
        "Enables or disables generating additional prediction column, but with more details",
        H2OTypeConverters.toBoolean())

    featuresCols = Param(
        Params._dummy(),
        "featuresCols",
        "Name of feature columns",
        H2OTypeConverters.toListString())

    convertUnknownCategoricalLevelsToNa = Param(
        Params._dummy(),
        "convertUnknownCategoricalLevelsToNa",
        "If set to 'true', the model converts unknown categorical levels to NA during making predictions.",
        H2OTypeConverters.toBoolean())

    convertInvalidNumbersToNa = Param(
        Params._dummy(),
        "convertInvalidNumbersToNa",
        "If set to 'true', the model converts invalid numbers to NA during making predictions.",
        H2OTypeConverters.toBoolean())

    namedMojoOutputColumns = Param(
        Params._dummy(),
        "namedMojoOutputColumns",
        "Mojo Output is not stored in the array but in the properly named columns",
        H2OTypeConverters.toBoolean())

    ##
    # Getters
    ##
    def getPredictionCol(self):
        return self.getOrDefault(self.predictionCol)

    def getDetailedPredictionCol(self):
        return self.getOrDefault(self.detailedPredictionCol)

    def getWithDetailedPredictionCol(self):
        return self.getOrDefault(self.withDetailedPredictionCol)

    def getFeaturesCols(self):
        return self.getOrDefault(self.featuresCols)

    def getConvertUnknownCategoricalLevelsToNa(self):
        return self.getOrDefault(self.convertUnknownCategoricalLevelsToNa)

    def getConvertInvalidNumbersToNa(self):
        return self.getOrDefault(self.convertInvalidNumbersToNa)

    def getNamedMojoOutputColumns(self):
        return self.getOrDefault(self.namedMojoOutputColumns)
class H2OTargetEncoderParams(Params):
    ##
    # Param definitions
    ##
    foldCol = Param(Params._dummy(), "foldCol", "Fold column name",
                    H2OTypeConverters.toNullableString())

    labelCol = Param(Params._dummy(), "labelCol", "Label column name",
                     H2OTypeConverters.toString())

    inputCols = Param(Params._dummy(), "inputCols",
                      "Names of columns that will be transformed",
                      H2OTypeConverters.toListString())

    outputCols = Param(
        Params._dummy(), "outputCols",
        "Names of columns representing the result of target encoding",
        H2OTypeConverters.toListString())

    holdoutStrategy = Param(
        Params._dummy(), "holdoutStrategy",
        """A strategy deciding what records will be excluded when calculating the target average on the training dataset.
           Options:
            None        - All rows are considered for the calculation
            LeaveOneOut - All rows except the row the calculation is made for
            KFold       - Only out-of-fold data is considered (The option requires foldCol to be set.""",
        H2OTypeConverters.toEnumString(
            "ai.h2o.targetencoding.TargetEncoder$DataLeakageHandlingStrategy"))

    blendedAvgEnabled = Param(
        Params._dummy(), "blendedAvgEnabled",
        "If set, the target average becomes a weighted average of the posterior average for a given "
        "categorical level and the prior average of the target. The weight is determined by the size "
        "of the given group that the row belongs to. By default, the blended average is disabled.",
        H2OTypeConverters.toBoolean())

    blendedAvgInflectionPoint = Param(
        Params._dummy(), "blendedAvgInflectionPoint",
        "A parameter of the blended average. The bigger number is set, the groups relatively bigger to the "
        "overall data set size will consider the global target value as a component in the weighted average. "
        "The default value is 10."
        "", H2OTypeConverters.toFloat())

    blendedAvgSmoothing = Param(
        Params._dummy(), "blendedAvgSmoothing",
        "A parameter of blended average. Controls the rate of transition between a group target value "
        "and a global target value. The default value is 20.",
        H2OTypeConverters.toFloat())

    noise = Param(
        Params._dummy(), "noise",
        "Amount of random noise added to output values. The default value is 0.01",
        H2OTypeConverters.toFloat())

    noiseSeed = Param(Params._dummy(), "noiseSeed",
                      "A seed of the generator producing the random noise",
                      H2OTypeConverters.toInt())

    ##
    # Getters
    ##
    def getFoldCol(self):
        return self.getOrDefault(self.foldCol)

    def getLabelCol(self):
        return self.getOrDefault(self.labelCol)

    def getInputCols(self):
        return self.getOrDefault(self.inputCols)

    def getOutputCols(self):
        columns = self.getOrDefault(self.outputCols)
        if not columns:
            return list(map(lambda c: c + "_te", self.getInputCols()))
        else:
            return columns

    def getHoldoutStrategy(self):
        return self.getOrDefault(self.holdoutStrategy)

    def getBlendedAvgEnabled(self):
        return self.getOrDefault(self.blendedAvgEnabled)

    def getBlendedAvgInflectionPoint(self):
        return self.getOrDefault(self.blendedAvgInflectionPoint)

    def getBlendedAvgSmoothing(self):
        return self.getOrDefault(self.blendedAvgSmoothing)

    def getNoise(self):
        return self.getOrDefault(self.noise)

    def getNoiseSeed(self):
        return self.getOrDefault(self.noiseSeed)
Пример #5
0
class H2OBaseMOJOParams(Params):
    predictionCol = Param(
        Params._dummy(),
        "predictionCol",
        "Prediction column name",
        H2OTypeConverters.toString())

    detailedPredictionCol = Param(
        Params._dummy(),
        "detailedPredictionCol",
        "Column containing additional prediction details, its content depends on the model type.",
        H2OTypeConverters.toString())

    withDetailedPredictionCol = Param(
        Params._dummy(),
        "withDetailedPredictionCol",
        "Enables or disables generating additional prediction column, but with more details",
        H2OTypeConverters.toBoolean())

    withContributions = Param(
        Params._dummy(),
        "withContributions",
        "Enables or disables generating a sub-column of detailedPredictionCol containing Shapley values.",
        H2OTypeConverters.toBoolean())

    featuresCols = Param(
        Params._dummy(),
        "featuresCols",
        "Name of feature columns",
        H2OTypeConverters.toListString())

    convertUnknownCategoricalLevelsToNa = Param(
        Params._dummy(),
        "convertUnknownCategoricalLevelsToNa",
        "If set to 'true', the model converts unknown categorical levels to NA during making predictions.",
        H2OTypeConverters.toBoolean())

    convertInvalidNumbersToNa = Param(
        Params._dummy(),
        "convertInvalidNumbersToNa",
        "If set to 'true', the model converts invalid numbers to NA during making predictions.",
        H2OTypeConverters.toBoolean())

    namedMojoOutputColumns = Param(
        Params._dummy(),
        "namedMojoOutputColumns",
        "Mojo Output is not stored in the array but in the properly named columns",
        H2OTypeConverters.toBoolean())

    withLeafNodeAssignments = Param(
        Params._dummy(),
        "withLeafNodeAssignments",
        "Enables or disables computation of leaf node assignments.",
        H2OTypeConverters.toBoolean())

    withStageResults = Param(
        Params._dummy(),
        "withStageResults",
        "Enables or disables computation of stage results.",
        H2OTypeConverters.toBoolean())

    ##
    # Getters
    ##
    def getPredictionCol(self):
        return self.getOrDefault(self.predictionCol)

    def getDetailedPredictionCol(self):
        return self.getOrDefault(self.detailedPredictionCol)

    def getWithDetailedPredictionCol(self):
        warnings.warn("The method will be removed without a replacement in the version 3.34."
                      "Detailed prediction columns is always enabled.", DeprecationWarning)
        return True

    def getWithContributions(self):
        return self.getOrDefault(self.withContributions)

    def getFeaturesCols(self):
        return self.getOrDefault(self.featuresCols)

    def getConvertUnknownCategoricalLevelsToNa(self):
        return self.getOrDefault(self.convertUnknownCategoricalLevelsToNa)

    def getConvertInvalidNumbersToNa(self):
        return self.getOrDefault(self.convertInvalidNumbersToNa)

    def getNamedMojoOutputColumns(self):
        return self.getOrDefault(self.namedMojoOutputColumns)

    def getWithLeafNodeAssignments(self):
        return self.getOrDefault(self.withLeafNodeAssignments)

    def getWithStageResults(self):
        return self.getOrDefault(self.withStageResults)