Пример #1
0
class ColumnPruner(H2OStageBase, JavaTransformer):
    keep = Param(Params._dummy(), "keep",
                 "keep the specified columns in the frame",
                 H2OTypeConverters.toBoolean())

    columns = Param(Params._dummy(), "columns", "specified columns",
                    H2OTypeConverters.toListString())

    @keyword_only
    def __init__(self, keep=False, columns=[]):
        Initializer.load_sparkling_jar()
        super(ColumnPruner, self).__init__()
        self._java_obj = self._new_java_obj(
            "ai.h2o.sparkling.ml.features.ColumnPruner", self.uid)
        self._setDefaultValuesFromJava()
        kwargs = Utils.getInputKwargs(self)
        self._set(**kwargs)

    def setKeep(self, value):
        return self._set(keep=value)

    def setColumns(self, value):
        return self._set(columns=value)

    def getKeep(self):
        return self.getOrDefault(self.keep)

    def getColumns(self):
        return self.getOrDefault(self.columns)
Пример #2
0
class H2ODRFParams(H2OSharedTreeParams):
    ##
    # Param definitions
    ##
    binomialDoubleTrees = Param(
        Params._dummy(), "binomialDoubleTrees",
        "In case of binary classification, build 2 times more trees (one per class) - can lead "
        "to higher accuracy.", H2OTypeConverters.toBoolean())

    mtries = Param(
        Params._dummy(), "mtries",
        "Number of variables randomly sampled as candidates at each split. If set to -1, defaults "
        "to sqrt{p} for classification and p/3 for regression (where p is the # of predictors",
        H2OTypeConverters.toInt())

    ##
    # Getters
    ##
    def getBinomialDoubleTrees(self):
        return self.getOrDefault(self.binomialDoubleTrees)

    def getMtries(self):
        return self.getOrDefault(self.mtries)

    ##
    # Setters
    ##
    def setBinomialDoubleTrees(self, value):
        return self._set(binomialDoubleTrees=value)

    def setMtries(self, value):
        return self._set(mtries=value)
Пример #3
0
class HasStoppingCriteria(Params):
    stoppingRounds = Param(Params._dummy(), "stoppingRounds",
                           "Stopping Rounds", H2OTypeConverters.toInt())

    stoppingMetric = Param(
        Params._dummy(), "stoppingMetric", "Stopping Metric",
        H2OTypeConverters.toEnumString("hex.ScoreKeeper$StoppingMetric"))

    stoppingTolerance = Param(Params._dummy(),
                              "stoppingTolerance", "Stopping Tolerance",
                              H2OTypeConverters.toFloat())

    def getStoppingRounds(self):
        return self.getOrDefault(self.stoppingRounds)

    def getStoppingMetric(self):
        return self.getOrDefault(self.stoppingMetric)

    def getStoppingTolerance(self):
        return self.getOrDefault(self.stoppingTolerance)

    def setStoppingRounds(self, value):
        return self._set(stoppingRounds=value)

    def setStoppingMetric(self, value):
        return self._set(stoppingMetric=value)

    def setStoppingTolerance(self, value):
        return self._set(stoppingTolerance=value)
Пример #4
0
class H2OTreeBasedSupervisedMOJOParams(H2OSupervisedMOJOParams):
    ntrees = Param(Params._dummy(), "ntrees",
                   "Number of trees representing the model",
                   H2OTypeConverters.toInt())

    def getNtrees(self):
        return self.getOrDefault(self.ntrees)
class H2OSupervisedMOJOParams(H2OMOJOAlgoSharedParams):

    offsetCol = Param(Params._dummy(), "offsetCol", "Offset column name",
                      H2OTypeConverters.toNullableString())

    def getOffsetCol(self):
        return self.getOrDefault(self.offsetCol)
Пример #6
0
class HasUserY(Params):
    userY = Param(Params._dummy(), "userY", "User-specified initial matrix Y.",
                  H2OTypeConverters.toNullableDataFrame())

    def getUserY(self):
        return self.getOrDefault(self.userY)

    def setUserY(self, value):
        return self._set(userY=value)
Пример #7
0
class HasGamCols(Params):
    gamCols = Param(Params._dummy(), "gamCols",
                    "Predictor column names for gam.",
                    H2OTypeConverters.toNullableListString())

    def getGamCols(self):
        return self.getOrDefault(self.gamCols)

    def setGamCols(self, value):
        return self._set(gamCols=value)
Пример #8
0
class HasRandomCols(Params):
    randomCols = Param(Params._dummy(), "randomCols",
                       "Names of random columns for HGLM.",
                       H2OTypeConverters.toNullableListString())

    def getRandomCols(self):
        return self.getOrDefault(self.randomCols)

    def setRandomCols(self, value):
        return self._set(randomCols=value)
Пример #9
0
class HasIgnoredCols(Params):
    ignoredCols = Param(Params._dummy(), "ignoredCols",
                        "Names of columns to ignore for training.",
                        H2OTypeConverters.toNullableListString())

    def getIgnoredCols(self):
        return self.getOrDefault(self.ignoredCols)

    def setIgnoredCols(self, value):
        return self._set(ignoredCols=value)
Пример #10
0
class HasBetaConstraints(Params):
    betaConstraints = Param(
        Params._dummy(), "betaConstraints",
        "Data frame of beta constraints enabling to set special conditions over the model coefficients.",
        H2OTypeConverters.toNullableDataFrame())

    def getBetaConstraints(self):
        return self.getOrDefault(self.betaConstraints)

    def setBetaConstraints(self, value):
        return self._set(betaConstraints=value)
Пример #11
0
class HasMonotoneConstraints(Params):
    monotoneConstraints = Param(
        Params._dummy(), "monotoneConstraints",
        "Monotone Constraints - A key must correspond to a feature name and value could be 1 or -1",
        H2OTypeConverters.toDictionaryWithFloatElements())

    def getMonotoneConstraints(self):
        return self.getOrDefault(self.monotoneConstraints)

    def setMonotoneConstraints(self, value):
        return self._set(monotoneConstraints=value)
Пример #12
0
class HasLossByColNames(Params):
    lossByColNames = Param(
        Params._dummy(), "lossByColNames",
        "Column names for which loss function will be overridden by the 'lossByCol' parameter",
        H2OTypeConverters.toNullableListString())

    def getLossByColNames(self):
        return self.getOrDefault(self.lossByColNames)

    def setLossByColNames(self, value):
        return self._set(lossByColNames=value)
class HasQuantileAlpha(Params):
    quantileAlpha = Param(
        Params._dummy(),
        "quantileAlpha",
        "Desired quantile when performing quantile regression.",
        H2OTypeConverters.toFloat())

    def getQuantileAlpha(self):
        return self.getOrDefault(self.quantileAlpha)

    def setQuantileAlpha(self, value):
        return self._set(quantileAlpha=value)
Пример #14
0
class HasInitialWeights(Params):
    initialWeights = Param(
        Params._dummy(), "initialWeights",
        "A array of weight matrices to be used for initialization of the neural network. "
        "If this parameter is set, the parameter 'initialBiases' has to be set as well.",
        H2OTypeConverters.toNullableListDenseMatrix())

    def getInitialWeights(self):
        return self.getOrDefault(self.initialWeights)

    def setInitialWeights(self, value):
        return self._set(initialWeights=value)
class HasCalibrationDataFrame(Params):
    calibrationDataFrame = Param(
        Params._dummy(), "calibrationDataFrame",
        "Calibration data frame for Platt Scaling. "
        "To enable usage of the data frame, set the parameter calibrateModel to True.",
        H2OTypeConverters.toNullableDataFrame())

    def getCalibrationDataFrame(self):
        return self.getOrDefault(self.calibrationDataFrame)

    def setCalibrationDataFrame(self, value):
        return self._set(calibrationDataFrame=value)
class H2OCommonSupervisedParams(H2OCommonParams):
    labelCol = Param(
        Params._dummy(),
        "labelCol",
        "Label column name",
        H2OTypeConverters.toString())

    def getLabelCol(self):
        return self.getOrDefault(self.labelCol)

    def setLabelCol(self, value):
        return self._set(labelCol=value)
Пример #17
0
class HasInitialBiases(Params):
    initialBiases = Param(
        Params._dummy(), "initialBiases",
        "A array of weight vectors to be used for bias initialization of every network layer. "
        "If this parameter is set, the parameter 'initialWeights' has to be set as well.",
        H2OTypeConverters.toNullableListDenseVector())

    def getInitialBiases(self):
        return self.getOrDefault(self.initialBiases)

    def setInitialBiases(self, value):
        return self._set(initialBiases=value)
Пример #18
0
class HasInteractionPairs(Params):
    interactionPairs = Param(
        Params._dummy(), "interactionPairs",
        "A list of pairwise (first order) column interactions.",
        H2OTypeConverters.toNullableListPairString())

    def getInteractionPairs(self):
        return None

    def setInteractionPairs(self, value):
        warnings.warn("Interaction pairs are not supported.")
        return self
Пример #19
0
class HasPlugValues(Params):
    plugValues = Param(
        Params._dummy(), "plugValues",
        "A dictionary containing values that will be used to impute missing values of the training/validation frame, "
        "use with conjunction missingValuesHandling = 'PlugValues')",
        H2OTypeConverters.toNullableDictionaryWithAnyElements())

    def getPlugValues(self):
        return self.getOrDefault(self.plugValues)

    def setPlugValues(self, value):
        return self._set(plugValues=value)
Пример #20
0
class HasValidationLabelCol(Params):
    validationLabelCol = Param(
        Params._dummy(), "validationLabelCol",
        "(experimental) Name of the label column in the validation data frame. "
        "The label column should be a string column with two distinct values indicating the anomaly. "
        "The negative value must be alphabetically smaller than the positive value. (E.g. '0'/'1', 'False'/'True')",
        H2OTypeConverters.toString())

    def getValidationLabelCol(self):
        return self.getOrDefault(self.validationLabelCol)

    def setValidationLabelCol(self, value):
        return self._set(validationLabelCol=value)
Пример #21
0
class HasUserPoints(Params):
    userPoints = Param(
        Params._dummy(), "userPoints",
        "This option allows you to specify array of points, where each point represents coordinates of an initial"
        " cluster center. The user-specified"
        " points must have the same number of columns as the training observations. The number of rows must equal"
        " the number of clusters.",
        H2OTypeConverters.toNullableListListFloat())

    def getUserPoints(self):
        return self.getOrDefault(self.userPoints)

    def setUserPoints(self, value):
        return self._set(userPoints=value)
Пример #22
0
 def getTrainingParams(self):
     return H2OTypeConverters.scalaMapStringStringToDictStringAny(self._java_obj.getTrainingParams())
class H2OGridSearchParams(H2OCommonSupervisedParams):
    ##
    # Param definitions
    ##
    algo = Param(Params._dummy(), "algo", "Algo to run grid search on",
                 H2OTypeConverters.toH2OGridSearchSupportedAlgo())

    hyperParameters = Param(Params._dummy(), "hyperParameters",
                            "Grid Search Hyper Params map",
                            H2OTypeConverters.toDictionaryWithAnyElements())

    strategy = Param(
        Params._dummy(), "strategy", "strategy",
        H2OTypeConverters.toEnumString(
            "hex.grid.HyperSpaceSearchCriteria$Strategy"))

    maxRuntimeSecs = Param(Params._dummy(), "maxRuntimeSecs", "maxRuntimeSecs",
                           H2OTypeConverters.toFloat())

    maxModels = Param(Params._dummy(), "maxModels", "maxModels",
                      H2OTypeConverters.toInt())

    stoppingRounds = Param(Params._dummy(), "stoppingRounds", "stoppingRounds",
                           H2OTypeConverters.toInt())

    stoppingTolerance = Param(Params._dummy(), "stoppingTolerance",
                              "stoppingTolerance", H2OTypeConverters.toFloat())

    stoppingMetric = Param(
        Params._dummy(), "stoppingMetric", "stoppingMetric",
        H2OTypeConverters.toEnumString("hex.ScoreKeeper$StoppingMetric"))

    selectBestModelBy = Param(
        Params._dummy(), "selectBestModelBy",
        "Specifies the metric which is used for comparing and sorting the models returned by the grid.",
        H2OTypeConverters.toEnumString(
            "ai.h2o.sparkling.ml.internals.H2OMetric"))

    parallelism = Param(
        Params._dummy(), "parallelism",
        """Level of model-building parallelism, the possible values are:
           0 -> H2O selects parallelism level based on cluster configuration, such as number of cores
           1 -> Sequential model building, no parallelism
           n>1 -> n models will be built in parallel if possible""",
        H2OTypeConverters.toInt())

    ##
    # Getters
    ##
    def getAlgo(self):
        javaAlgo = self.getOrDefault(self.algo)
        algoName = javaAlgo.parameters().algoName()
        if algoName == "GBM":
            from ai.h2o.sparkling.ml.algos import H2OGBM
            algo = H2OGBM()
        elif algoName == "DeepLearning":
            from ai.h2o.sparkling.ml.algos import H2ODeepLearning
            algo = H2ODeepLearning()
        elif algoName == "XGBoost":
            from ai.h2o.sparkling.ml.algos import H2OXGBoost
            algo = H2OXGBoost()
        elif algoName == "GLM":
            from ai.h2o.sparkling.ml.algos import H2OGLM
            algo = H2OGLM()
        elif algoName == "DRF":
            from ai.h2o.sparkling.ml.algos import H2ODRF
            algo = H2ODRF()
        else:
            raise ValueError('Unsupported algorithm for H2OGridSearch')

        algo._resetUid(javaAlgo.uid())
        algo._java_obj = javaAlgo
        algo._transfer_params_from_java()
        return algo

    def getHyperParameters(self):
        return self.getOrDefault(self.hyperParameters)

    def getStrategy(self):
        return self.getOrDefault(self.strategy)

    def getMaxRuntimeSecs(self):
        return self.getOrDefault(self.maxRuntimeSecs)

    def getMaxModels(self):
        return self.getOrDefault(self.maxModels)

    def getStoppingRounds(self):
        return self.getOrDefault(self.stoppingRounds)

    def getStoppingTolerance(self):
        return self.getOrDefault(self.stoppingTolerance)

    def getStoppingMetric(self):
        return self.getOrDefault(self.stoppingMetric)

    def getSelectBestModelBy(self):
        return self.getOrDefault(self.selectBestModelBy)

    def getParallelism(self):
        return self.getOrDefault(self.parallelism)

    ##
    # Setters
    ##
    def setAlgo(self, value):
        return self._set(algo=value)

    def setHyperParameters(self, value):
        return self._set(hyperParameters=value)

    def setStrategy(self, value):
        return self._set(strategy=value)

    def setMaxRuntimeSecs(self, value):
        return self._set(maxRuntimeSecs=value)

    def setMaxModels(self, value):
        return self._set(maxModels=value)

    def setStoppingRounds(self, value):
        return self._set(stoppingRounds=value)

    def setStoppingTolerance(self, value):
        return self._set(stoppingTolerance=value)

    def setStoppingMetric(self, value):
        return self._set(stoppingMetric=value)

    def setSelectBestModelBy(self, value):
        return self._set(selectBestModelBy=value)

    def setParallelism(self, value):
        return self._set(parallelism=value)
class H2OXGBoostParams(H2OAlgoSupervisedParams):
    ##
    # Param definitions
    ##
    quietMode = Param(
        Params._dummy(),
        "quietMode",
        "Quiet mode",
        H2OTypeConverters.toBoolean())

    ntrees = Param(

        Params._dummy(),
        "ntrees",
        "Number of trees",
        H2OTypeConverters.toInt())

    nEstimators = Param(
        Params._dummy(),
        "nEstimators",
        "number of estimators",
        H2OTypeConverters.toInt())

    maxDepth = Param(
        Params._dummy(),
        "maxDepth",
        "Maximal depth",
        H2OTypeConverters.toInt())

    minRows = Param(
        Params._dummy(),
        "minRows",
        "Min rows",
        H2OTypeConverters.toFloat())

    minChildWeight = Param(
        Params._dummy(),
        "minChildWeight",
        "minimal child weight",
        H2OTypeConverters.toFloat())

    learnRate = Param(
        Params._dummy(),
        "learnRate",
        "learn rate",
        H2OTypeConverters.toFloat())

    eta = Param(
        Params._dummy(),
        "eta",
        "eta",
        H2OTypeConverters.toFloat())

    learnRateAnnealing = Param(
        Params._dummy(),
        "learnRateAnnealing",
        "Learn Rate Annealing",
        H2OTypeConverters.toFloat())

    sampleRate = Param(
        Params._dummy(),
        "sampleRate",
        "Sample rate",
        H2OTypeConverters.toFloat())

    subsample = Param(
        Params._dummy(),
        "subsample",
        "subsample",
        H2OTypeConverters.toFloat())

    colSampleRate = Param(
        Params._dummy(),
        "colSampleRate",
        "col sample rate",
        H2OTypeConverters.toFloat())

    colSampleByLevel = Param(
        Params._dummy(),
        "colSampleByLevel",
        "Col Sample By Level",
        H2OTypeConverters.toFloat())

    colSampleRatePerTree = Param(
        Params._dummy(),
        "colSampleRatePerTree",
        "col samle rate",
        H2OTypeConverters.toFloat())

    colSampleByTree = Param(
        Params._dummy(),
        "colSampleByTree",
        "col sample by tree",
        H2OTypeConverters.toFloat())

    maxAbsLeafnodePred = Param(
        Params._dummy(),
        "maxAbsLeafnodePred",
        "max abs lead node prediction",
        H2OTypeConverters.toFloat())

    maxDeltaStep = Param(
        Params._dummy(),
        "maxDeltaStep",
        "max delta step",
        H2OTypeConverters.toFloat())

    scoreTreeInterval = Param(
        Params._dummy(),
        "scoreTreeInterval",
        "score tree interval",
        H2OTypeConverters.toInt())

    initialScoreInterval = Param(
        Params._dummy(),
        "initialScoreInterval",
        "Initial Score Interval",
        H2OTypeConverters.toInt())

    scoreInterval = Param(
        Params._dummy(),
        "scoreInterval",
        "Score Interval",
        H2OTypeConverters.toInt())

    minSplitImprovement = Param(
        Params._dummy(),
        "minSplitImprovement",
        "Min split improvement",
        H2OTypeConverters.toFloat())

    gamma = Param(
        Params._dummy(),
        "gamma",
        "gamma",
        H2OTypeConverters.toFloat())

    nthread = Param(
        Params._dummy(),
        "nthread",
        "nthread",
        H2OTypeConverters.toInt())

    maxBins = Param(
        Params._dummy(),
        "maxBins",
        "nbins",
        H2OTypeConverters.toInt())

    maxLeaves = Param(
        Params._dummy(),
        "maxLeaves",
        "max leaves",
        H2OTypeConverters.toInt())

    minSumHessianInLeaf = Param(
        Params._dummy(),
        "minSumHessianInLeaf",
        "min sum hessian in leaf",
        H2OTypeConverters.toFloat())

    minDataInLeaf = Param(
        Params._dummy(),
        "minDataInLeaf",
        "min data in leaf",
        H2OTypeConverters.toFloat())

    treeMethod = Param(
        Params._dummy(),
        "treeMethod",
        "Tree Method",
        H2OTypeConverters.toEnumString("hex.tree.xgboost.XGBoostModel$XGBoostParameters$TreeMethod"))

    growPolicy = Param(
        Params._dummy(),
        "growPolicy",
        "Grow Policy",
        H2OTypeConverters.toEnumString("hex.tree.xgboost.XGBoostModel$XGBoostParameters$GrowPolicy"))

    booster = Param(
        Params._dummy(),
        "booster",
        "Booster",
        H2OTypeConverters.toEnumString("hex.tree.xgboost.XGBoostModel$XGBoostParameters$Booster"))

    dmatrixType = Param(
        Params._dummy(),
        "dmatrixType",
        "DMatrix type",
        H2OTypeConverters.toEnumString("hex.tree.xgboost.XGBoostModel$XGBoostParameters$DMatrixType"))

    regLambda = Param(
        Params._dummy(),
        "regLambda",
        "req lambda",
        H2OTypeConverters.toFloat())

    regAlpha = Param(
        Params._dummy(),
        "regAlpha",
        "req aplha",
        H2OTypeConverters.toFloat())

    sampleType = Param(
        Params._dummy(),
        "sampleType",
        "Dart Sample Type",
        H2OTypeConverters.toEnumString("hex.tree.xgboost.XGBoostModel$XGBoostParameters$DartSampleType"))

    normalizeType = Param(
        Params._dummy(),
        "normalizeType",
        "Dart Normalize Type",
        H2OTypeConverters.toEnumString("hex.tree.xgboost.XGBoostModel$XGBoostParameters$DartNormalizeType"))

    rateDrop = Param(
        Params._dummy(),
        "rateDrop",
        "rate drop",
        H2OTypeConverters.toFloat())

    oneDrop = Param(
        Params._dummy(),
        "oneDrop",
        "onde drop",
        H2OTypeConverters.toBoolean())

    skipDrop = Param(
        Params._dummy(),
        "skipDrop",
        "skip drop",
        H2OTypeConverters.toFloat())

    gpuId = Param(
        Params._dummy(),
        "gpuId",
        "GPU id",
        H2OTypeConverters.toInt())

    backend = Param(
        Params._dummy(),
        "backend",
        "Backend",
        H2OTypeConverters.toEnumString("hex.tree.xgboost.XGBoostModel$XGBoostParameters$Backend"))

    ##
    # Getters
    ##
    def getQuietMode(self):
        return self.getOrDefault(self.quietMode)

    def getNtrees(self):
        return self.getOrDefault(self.ntrees)

    def getNEstimators(self):
        return self.getOrDefault(self.nEstimators)

    def getMaxDepth(self):
        return self.getOrDefault(self.maxDepth)

    def getMinRows(self):
        return self.getOrDefault(self.minRows)

    def getMinChildWeight(self):
        return self.getOrDefault(self.minChildWeight)

    def getLearnRate(self):
        return self.getOrDefault(self.learnRate)

    def getEta(self):
        return self.getOrDefault(self.eta)

    def getLearnRateAnnealing(self):
        return self.getOrDefault(self.learnRateAnnealing)

    def getSampleRate(self):
        return self.getOrDefault(self.sampleRate)

    def getSubsample(self):
        return self.getOrDefault(self.subsample)

    def getColSampleRate(self):
        return self.getOrDefault(self.colSampleRate)

    def getColSampleByLevel(self):
        return self.getOrDefault(self.colSampleByLevel)

    def getColSampleRatePerTree(self):
        return self.getOrDefault(self.colSampleRatePerTree)

    def getColSampleByTree(self):
        return self.getOrDefault(self.colSampleByTree)

    def getMaxAbsLeafnodePred(self):
        return self.getOrDefault(self.maxAbsLeafnodePred)

    def getMaxDeltaStep(self):
        return self.getOrDefault(self.maxDeltaStep)

    def getScoreTreeInterval(self):
        return self.getOrDefault(self.scoreTreeInterval)

    def getInitialScoreInterval(self):
        return self.getOrDefault(self.initialScoreInterval)

    def getScoreInterval(self):
        return self.getOrDefault(self.scoreInterval)

    def getMinSplitImprovement(self):
        return self.getOrDefault(self.minSplitImprovement)

    def getGamma(self):
        return self.getOrDefault(self.gamma)

    def getNthread(self):
        return self.getOrDefault(self.nthread)

    def getMaxBins(self):
        return self.getOrDefault(self.maxBins)

    def getMaxLeaves(self):
        return self.getOrDefault(self.maxLeaves)

    def getMinSumHessianInLeaf(self):
        return self.getOrDefault(self.minSumHessianInLeaf)

    def getMinDataInLeaf(self):
        return self.getOrDefault(self.minDataInLeaf)

    def getTreeMethod(self):
        return self.getOrDefault(self.treeMethod)

    def getGrowPolicy(self):
        return self.getOrDefault(self.growPolicy)

    def getBooster(self):
        return self.getOrDefault(self.booster)

    def getDmatrixType(self):
        return self.getOrDefault(self.dmatrixType)

    def getRegLambda(self):
        return self.getOrDefault(self.regLambda)

    def getRegAlpha(self):
        return self.getOrDefault(self.regAlpha)

    def getSampleType(self):
        return self.getOrDefault(self.sampleType)

    def getNormalizeType(self):
        return self.getOrDefault(self.normalizeType)

    def getRateDrop(self):
        return self.getOrDefault(self.rateDrop)

    def getOneDrop(self):
        return self.getOrDefault(self.oneDrop)

    def getSkipDrop(self):
        return self.getOrDefault(self.skipDrop)

    def getGpuId(self):
        return self.getOrDefault(self.gpuId)

    def getBackend(self):
        return self.getOrDefault(self.backend)

    ##
    # Setters
    ##
    def setQuietMode(self, value):
        return self._set(quietMode=value)

    def setNtrees(self, value):
        return self._set(ntrees=value)

    def setNEstimators(self, value):
        return self._set(nEstimators=value)

    def setMaxDepth(self, value):
        return self._set(maxDepth=value)

    def setMinRows(self, value):
        return self._set(minRows=value)

    def setMinChildWeight(self, value):
        return self._set(minChildWeight=value)

    def setLearnRate(self, value):
        return self._set(learnRate=value)

    def setEta(self, value):
        return self._set(eta=value)

    def setLearnRateAnnealing(self, value):
        return self._set(learnRateAnnealing=value)

    def setSampleRate(self, value):
        return self._set(sampleRate=value)

    def setSubsample(self, value):
        return self._set(subsample=value)

    def setColSampleRate(self, value):
        return self._set(colSampleRate=value)

    def setColSampleByLevel(self, value):
        return self._set(colSampleByLevel=value)

    def setColSampleRatePerTree(self, value):
        return self._set(colSampleRatePerTree=value)

    def setColSampleByTree(self, value):
        return self._set(colSampleByTree=value)

    def setMaxAbsLeafnodePred(self, value):
        return self._set(maxAbsLeafnodePred=value)

    def setMaxDeltaStep(self, value):
        return self._set(maxDeltaStep=value)

    def setScoreTreeInterval(self, value):
        return self._set(scoreTreeInterval=value)

    def setInitialScoreInterval(self, value):
        return self._set(initialScoreInterval=value)

    def setScoreInterval(self, value):
        return self._set(scoreInterval=value)

    def setMinSplitImprovement(self, value):
        return self._set(minSplitImprovement=value)

    def setGamma(self, value):
        return self._set(gamma=value)

    def setNthread(self, value):
        return self._set(nthread=value)

    def setMaxBins(self, value):
        return self._set(maxBins=value)

    def setMaxLeaves(self, value):
        return self._set(maxLeaves=value)

    def setMinSumHessianInLeaf(self, value):
        return self._set(minSumHessianInLeaf=value)

    def setMinDataInLeaf(self, value):
        return self._set(minDataInLeaf=value)

    def setTreeMethod(self, value):
        return self._set(treeMethod=value)

    def setGrowPolicy(self, value):
        return self._set(growPolicy=value)

    def setBooster(self, value):
        return self._set(booster=value)

    def setDmatrixType(self, value):
        return self._set(dmatrixType=value)

    def setRegLambda(self, value):
        return self._set(regLambda=value)

    def setRegAlpha(self, value):
        return self._set(regAlpha=value)

    def setSampleType(self, value):
        return self._set(sampleType=value)

    def setNormalizeType(self, value):
        return self._set(normalizeType=value)

    def setRateDrop(self, value):
        return self._set(rateDrop=value)

    def setOneDrop(self, value):
        return self._set(oneDrop=value)

    def setSkipDrop(self, value):
        return self._set(skipDrop=value)

    def setGpuId(self, value):
        return self._set(gpuId=value)

    def setBackend(self, value):
        return self._set(backend=value)
Пример #25
0
class H2OSharedTreeParams(H2OAlgoSupervisedParams,
                          H2OTreeBasedSupervisedMOJOParams):
    ##
    # Param definitions
    ##
    maxDepth = Param(Params._dummy(), "maxDepth", "Maximum tree depth",
                     H2OTypeConverters.toInt())

    minRows = Param(Params._dummy(), "minRows",
                    "Fewest allowed (weighted) observations in a leaf",
                    H2OTypeConverters.toFloat())

    nbins = Param(
        Params._dummy(), "nbins",
        "For numerical columns (real/int), build a histogram of (at least) this many bins, then split "
        "at the best point", H2OTypeConverters.toInt())

    nbinsCats = Param(
        Params._dummy(), "nbinsCats",
        "For categorical columns (factors), build a histogram of this many bins, then split at the best "
        "point. Higher values can lead to more overfitting",
        H2OTypeConverters.toInt())

    minSplitImprovement = Param(
        Params._dummy(), "minSplitImprovement",
        "Minimum relative improvement in squared error reduction for a split to happen",
        H2OTypeConverters.toFloat())

    histogramType = Param(
        Params._dummy(), "histogramType",
        "What type of histogram to use for finding optimal split points",
        H2OTypeConverters.toEnumString(
            "hex.tree.SharedTreeModel$SharedTreeParameters$HistogramType"))

    r2Stopping = Param(
        Params._dummy(), "r2Stopping",
        "r2_stopping is no longer supported and will be ignored if set - please use stopping_rounds, "
        "stopping_metric and stopping_tolerance instead. Previous version of H2O would stop making trees "
        "when the R^2 metric equals or exceeds this",
        H2OTypeConverters.toFloat())

    nbinsTopLevel = Param(
        Params._dummy(), "nbinsTopLevel",
        "For numerical columns (real/int), build a histogram of (at most) this many bins at the root "
        "level, then decrease by factor of two per level",
        H2OTypeConverters.toInt())

    buildTreeOneNode = Param(
        Params._dummy(), "buildTreeOneNode",
        "Run on one node only; no network overhead but fewer cpus used.  Suitable for small datasets.",
        H2OTypeConverters.toBoolean())

    scoreTreeInterval = Param(
        Params._dummy(), "scoreTreeInterval",
        "Score the model after every so many trees. Disabled if set to 0.",
        H2OTypeConverters.toInt())
    sampleRate = Param(Params._dummy(), "sampleRate",
                       "Row sample rate per tree (from 0.0 to 1.0)",
                       H2OTypeConverters.toFloat())

    sampleRatePerClass = Param(
        Params._dummy(), "sampleRatePerClass",
        "A list of row sample rates per class (relative fraction for each class, from 0.0 to 1.0), for each tree",
        H2OTypeConverters.toNullableListFloat())

    colSampleRateChangePerLevel = Param(
        Params._dummy(), "colSampleRateChangePerLevel",
        "Relative change of the column sampling rate for every level (from 0.0 to 2.0)",
        H2OTypeConverters.toFloat())

    colSampleRatePerTree = Param(
        Params._dummy(), "colSampleRatePerTree",
        "Column sample rate per tree (from 0.0 to 1.0)",
        H2OTypeConverters.toFloat())

    ##
    # Getters
    ##
    def getMaxDepth(self):
        return self.getOrDefault(self.maxDepth)

    def getMinRows(self):
        return self.getOrDefault(self.minRows)

    def getNbins(self):
        return self.getOrDefault(self.nbins)

    def getNbinsCats(self):
        return self.getOrDefault(self.nbinsCats)

    def getMinSplitImprovement(self):
        return self.getOrDefault(self.minSplitImprovement)

    def getHistogramType(self):
        return self.getOrDefault(self.histogramType)

    def getR2Stopping(self):
        return self.getOrDefault(self.r2Stopping)

    def getNbinsTopLevel(self):
        return self.getOrDefault(self.nbinsTopLevel)

    def getBuildTreeOneNode(self):
        return self.getOrDefault(self.buildTreeOneNode)

    def getScoreTreeInterval(self):
        return self.getOrDefault(self.scoreTreeInterval)

    def getSampleRate(self):
        return self.getOrDefault(self.sampleRate)

    def getSampleRatePerClass(self):
        return self.getOrDefault(self.sampleRatePerClass)

    def getColSampleRateChangePerLevel(self):
        return self.getOrDefault(self.colSampleRateChangePerLevel)

    def getColSampleRatePerTree(self):
        return self.getOrDefault(self.colSampleRatePerTree)

    ##
    # Setters
    ##
    def setNtrees(self, value):
        return self._set(ntrees=value)

    def setMaxDepth(self, value):
        return self._set(maxDepth=value)

    def setMinRows(self, value):
        return self._set(minRows=value)

    def setNbins(self, value):
        return self._set(nbins=value)

    def setNbinsCats(self, value):
        return self._set(nbinsCats=value)

    def setMinSplitImprovement(self, value):
        return self._set(minSplitImprovement=value)

    def setHistogramType(self, value):
        return self._set(histogramType=value)

    def setR2Stopping(self, value):
        return self._set(r2Stopping=value)

    def setNbinsTopLevel(self, value):
        return self._set(nbinsTopLevel=value)

    def setBuildTreeOneNode(self, value):
        return self._set(buildTreeOneNode=value)

    def setScoreTreeInterval(self, value):
        return self._set(scoreTreeInterval=value)

    def setSampleRate(self, value):
        return self._set(sampleRate=value)

    def setSampleRatePerClass(self, value):
        return self._set(sampleRatePerClass=value)

    def setColSampleRateChangePerLevel(self, value):
        return self._set(colSampleRateChangePerLevel=value)

    def setColSampleRatePerTree(self, value):
        return self._set(colSampleRatePerTree=value)
Пример #26
0
class H2OKMeansParams(H2OAlgoUnsupervisedParams):
    maxIterations = Param(
        Params._dummy(),
        "maxIterations",
        "Maximum number of KMeans iterations to find the centroids.",
        H2OTypeConverters.toInt())

    standardize = Param(
        Params._dummy(),
        "standardize",
        "Standardize the numeric columns to have a mean of zero and unit variance.",
        H2OTypeConverters.toBoolean())

    init = Param(
        Params._dummy(),
        "init",
        "Initialization mode for finding the initial cluster centers.",
        H2OTypeConverters.toEnumString("hex.kmeans.KMeans$Initialization"))

    userPoints = Param(
        Params._dummy(),
        "userPoints",
        "This option enables to specify array of points, where each point represents coordinates of "
        "an initial cluster center. The user-specified points must have the same number of columns "
        "as the training observations. The number of rows must equal the number of clusters.",
        H2OTypeConverters.toNullableListListFloat())

    estimateK = Param(
        Params._dummy(),
        "estimateK",
        "If enabled, the algorithm tries to identify optimal number of clusters, up to k clusters.",
        H2OTypeConverters.toBoolean())

    k = Param(
        Params._dummy(),
        "k",
        "Number of clusters to generate.",
        H2OTypeConverters.toInt())

    #
    # Getters
    #
    def getMaxIterations(self):
        return self.getOrDefault(self.maxIterations)

    def getStandardize(self):
        return self.getOrDefault(self.standardize)

    def getInit(self):
        return self.getOrDefault(self.init)

    def getUserPoints(self):
        return self.getOrDefault(self.userPoints)

    def getEstimateK(self):
        return self.getOrDefault(self.estimateK)

    def getK(self):
        return self.getOrDefault(self.k)

    #
    # Setters
    #
    def setMaxIterations(self, value):
        return self._set(maxIterations=value)

    def setStandardize(self, value):
        return self._set(standardize=value)

    def setInit(self, value):
        return self._set(init=value)

    def setUserPoints(self, value):
        return self._set(userPoints=value)

    def setEstimateK(self, value):
        return self._set(estimateK=value)

    def setK(self, value):
        return self._set(k=value)
Пример #27
0
class H2ODeepLearningParams(H2OAlgoSupervisedParams, HasStoppingCriteria):
    ##
    # Param definitions
    ##
    epochs = Param(
        Params._dummy(), "epochs",
        "The number of passes over the training dataset to be carried out",
        H2OTypeConverters.toFloat())

    l1 = Param(
        Params._dummy(), "l1",
        "A regularization method that constrains the absolute value of the weights and "
        "has the net effect of dropping some weights (setting them to zero) from a model "
        "to reduce complexity and avoid overfitting.",
        H2OTypeConverters.toFloat())

    l2 = Param(
        Params._dummy(), "l2",
        "A regularization method that constrains the sum of the squared weights. "
        "This method introduces bias into parameter estimates, but frequently "
        "produces substantial gains in modeling as estimate variance is reduced.",
        H2OTypeConverters.toFloat())

    hidden = Param(Params._dummy(), "hidden",
                   "The number and size of each hidden layer in the model",
                   H2OTypeConverters.toListInt())

    reproducible = Param(
        Params._dummy(), "reproducible",
        "Force reproducibility on small data (will be slow - only uses 1 thread)",
        H2OTypeConverters.toBoolean())

    ##
    # Getters
    ##
    def getEpochs(self):
        return self.getOrDefault(self.epochs)

    def getL1(self):
        return self.getOrDefault(self.l1)

    def getL2(self):
        return self.getOrDefault(self.l2)

    def getHidden(self):
        return self.getOrDefault(self.hidden)

    def getReproducible(self):
        return self.getOrDefault(self.reproducible)

    ##
    # Setters
    ##
    def setEpochs(self, value):
        return self._set(epochs=value)

    def setL1(self, value):
        return self._set(l1=value)

    def setL2(self, value):
        return self._set(l2=value)

    def setHidden(self, value):
        return self._set(hidden=value)

    def setReproducible(self, value):
        return self._set(reproducible=value)
Пример #28
0
class H2OAutoMLParams(H2OCommonSupervisedParams, HasMonotoneConstraints):
    ##
    # Param definitions
    ##
    ignoredCols = Param(
        Params._dummy(),
        "ignoredCols",
        "Ignored column names",
        H2OTypeConverters.toListString())

    includeAlgos = Param(
        Params._dummy(),
        "includeAlgos",
        "Algorithms to include when using automl",
        H2OTypeConverters.toEnumListString("ai.h2o.automl.Algo"))

    excludeAlgos = Param(
        Params._dummy(),
        "excludeAlgos",
        "Algorithms to exclude when using automl",
        H2OTypeConverters.toEnumListString("ai.h2o.automl.Algo"))

    projectName = Param(
        Params._dummy(),
        "projectName",
        "identifier for models that should be grouped together in the leaderboard "
        "(e.g., airlines and iris)",
        H2OTypeConverters.toNullableString())

    maxRuntimeSecs = Param(
        Params._dummy(),
        "maxRuntimeSecs",
        "Maximum time in seconds for automl to be running",
        H2OTypeConverters.toFloat())

    stoppingRounds = Param(
        Params._dummy(),
        "stoppingRounds",
        "Stopping rounds",
        H2OTypeConverters.toInt())

    stoppingTolerance = Param(
        Params._dummy(),
        "stoppingTolerance",
        "Stopping tolerance",
        H2OTypeConverters.toFloat())

    stoppingMetric = Param(
        Params._dummy(),
        "stoppingMetric",
        "Stopping metric",
        H2OTypeConverters.toEnumString("hex.ScoreKeeper$StoppingMetric"))

    sortMetric = Param(
        Params._dummy(),
        "sortMetric",
        "Sort metric for the AutoML leaderboard",
        H2OTypeConverters.toEnumString("ai.h2o.sparkling.ml.algos.H2OAutoMLSortMetric"))

    balanceClasses = Param(
        Params._dummy(),
        "balanceClasses",
        "Balance classes",
        H2OTypeConverters.toBoolean())

    classSamplingFactors = Param(
        Params._dummy(),
        "classSamplingFactors",
        "Class sampling factors",
        H2OTypeConverters.toNullableListFloat())

    maxAfterBalanceSize = Param(
        Params._dummy(),
        "maxAfterBalanceSize",
        "Max after balance size",
        H2OTypeConverters.toFloat())

    keepCrossValidationPredictions = Param(
        Params._dummy(),
        "keepCrossValidationPredictions",
        "Keep cross validation predictions",
        H2OTypeConverters.toBoolean())

    keepCrossValidationModels = Param(
        Params._dummy(),
        "keepCrossValidationModels",
        "Keep cross validation models",
        H2OTypeConverters.toBoolean())

    maxModels = Param(
        Params._dummy(),
        "maxModels",
        "Max models to train in AutoML",
        H2OTypeConverters.toInt())

    ##
    # Getters
    ##
    def getIgnoredCols(self):
        return self.getOrDefault(self.ignoredCols)

    def getTryMutations(self):
        return self.getOrDefault(self.tryMutations)

    def getExcludeAlgos(self):
        return self.getOrDefault(self.excludeAlgos)

    def getIncludeAlgos(self):
        return self.getOrDefault(self.includeAlgos)

    def getProjectName(self):
        return self.getOrDefault(self.projectName)

    def getMaxRuntimeSecs(self):
        return self.getOrDefault(self.maxRuntimeSecs)

    def getStoppingRounds(self):
        return self.getOrDefault(self.stoppingRounds)

    def getStoppingTolerance(self):
        return self.getOrDefault(self.stoppingTolerance)

    def getStoppingMetric(self):
        return self.getOrDefault(self.stoppingMetric)

    def getSortMetric(self):
        return self.getOrDefault(self.sortMetric)

    def getBalanceClasses(self):
        return self.getOrDefault(self.balanceClasses)

    def getClassSamplingFactors(self):
        return self.getOrDefault(self.classSamplingFactors)

    def getMaxAfterBalanceSize(self):
        return self.getOrDefault(self.maxAfterBalanceSize)

    def getKeepCrossValidationPredictions(self):
        return self.getOrDefault(self.keepCrossValidationPredictions)

    def getKeepCrossValidationModels(self):
        return self.getOrDefault(self.keepCrossValidationModels)

    def getMaxModels(self):
        return self.getOrDefault(self.maxModels)

    ##
    # Setters
    ##
    def setIgnoredCols(self, value):
        return self._set(ignoredCols=value)

    def setTryMutations(self, value):
        return self._set(tryMutations=value)

    def setIncludeAlgos(self, value):
        return self._set(includeAlgos=value)

    def setExcludeAlgos(self, value):
        return self._set(excludeAlgos=value)

    def setProjectName(self, value):
        return self._set(projectName=value)

    def setMaxRuntimeSecs(self, value):
        return self._set(maxRuntimeSecs=value)

    def setStoppingRounds(self, value):
        return self._set(stoppingRounds=value)

    def setStoppingTolerance(self, value):
        return self._set(stoppingTolerance=value)

    def setStoppingMetric(self, value):
        return self._set(stoppingMetric=value)

    def setSortMetric(self, value):
        return self._set(sortMetric=value)

    def setBalanceClasses(self, value):
        return self._set(balanceClasses=value)

    def setClassSamplingFactors(self, value):
        return self._set(classSamplingFactors=value)

    def setMaxAfterBalanceSize(self, value):
        return self._set(maxAfterBalanceSize=value)

    def setKeepCrossValidationPredictions(self, value):
        return self._set(keepCrossValidationPredictions=value)

    def setKeepCrossValidationModels(self, value):
        return self._set(keepCrossValidationModels=value)

    def setMaxModels(self, value):
        return self._set(maxModels=value)
Пример #29
0
class H2OXGBoostParams(H2OAlgoSupervisedParams,
                       H2OTreeBasedSupervisedMOJOParams,
                       HasMonotoneConstraints, HasStoppingCriteria):
    ##
    # Param definitions
    ##
    quietMode = Param(Params._dummy(), "quietMode", "Quiet mode",
                      H2OTypeConverters.toBoolean())

    maxDepth = Param(Params._dummy(), "maxDepth", "Maximal depth",
                     H2OTypeConverters.toInt())

    minRows = Param(Params._dummy(), "minRows", "Min rows",
                    H2OTypeConverters.toFloat())

    minChildWeight = Param(Params._dummy(), "minChildWeight",
                           "minimal child weight", H2OTypeConverters.toFloat())

    learnRate = Param(Params._dummy(), "learnRate", "learn rate",
                      H2OTypeConverters.toFloat())

    eta = Param(Params._dummy(), "eta", "eta", H2OTypeConverters.toFloat())

    sampleRate = Param(Params._dummy(), "sampleRate", "Sample rate",
                       H2OTypeConverters.toFloat())

    subsample = Param(Params._dummy(), "subsample", "subsample",
                      H2OTypeConverters.toFloat())

    colSampleRate = Param(Params._dummy(), "colSampleRate", "col sample rate",
                          H2OTypeConverters.toFloat())

    colSampleByLevel = Param(Params._dummy(),
                             "colSampleByLevel", "Col Sample By Level",
                             H2OTypeConverters.toFloat())

    colSampleRatePerTree = Param(Params._dummy(), "colSampleRatePerTree",
                                 "col samle rate", H2OTypeConverters.toFloat())

    colSampleByTree = Param(Params._dummy(), "colSampleByTree",
                            "col sample by tree", H2OTypeConverters.toFloat())

    colSampleByNode = Param(Params._dummy(), "colSampleByNode",
                            "col sample by node", H2OTypeConverters.toFloat())

    maxAbsLeafnodePred = Param(Params._dummy(), "maxAbsLeafnodePred",
                               "max abs lead node prediction",
                               H2OTypeConverters.toFloat())

    maxDeltaStep = Param(Params._dummy(), "maxDeltaStep", "max delta step",
                         H2OTypeConverters.toFloat())

    scoreTreeInterval = Param(Params._dummy(), "scoreTreeInterval",
                              "score tree interval", H2OTypeConverters.toInt())

    minSplitImprovement = Param(Params._dummy(), "minSplitImprovement",
                                "Min split improvement",
                                H2OTypeConverters.toFloat())

    gamma = Param(Params._dummy(), "gamma", "gamma",
                  H2OTypeConverters.toFloat())

    nthread = Param(Params._dummy(), "nthread", "nthread",
                    H2OTypeConverters.toInt())

    maxBins = Param(Params._dummy(), "maxBins", "nbins",
                    H2OTypeConverters.toInt())

    maxLeaves = Param(Params._dummy(), "maxLeaves", "max leaves",
                      H2OTypeConverters.toInt())

    minSumHessianInLeaf = Param(Params._dummy(), "minSumHessianInLeaf",
                                "min sum hessian in leaf",
                                H2OTypeConverters.toFloat())

    minDataInLeaf = Param(Params._dummy(), "minDataInLeaf", "min data in leaf",
                          H2OTypeConverters.toFloat())

    treeMethod = Param(
        Params._dummy(), "treeMethod", "Tree Method",
        H2OTypeConverters.toEnumString(
            "hex.tree.xgboost.XGBoostModel$XGBoostParameters$TreeMethod"))

    growPolicy = Param(
        Params._dummy(), "growPolicy", "Grow Policy",
        H2OTypeConverters.toEnumString(
            "hex.tree.xgboost.XGBoostModel$XGBoostParameters$GrowPolicy"))

    booster = Param(
        Params._dummy(), "booster", "Booster",
        H2OTypeConverters.toEnumString(
            "hex.tree.xgboost.XGBoostModel$XGBoostParameters$Booster"))

    dmatrixType = Param(
        Params._dummy(), "dmatrixType", "DMatrix type",
        H2OTypeConverters.toEnumString(
            "hex.tree.xgboost.XGBoostModel$XGBoostParameters$DMatrixType"))

    regLambda = Param(Params._dummy(), "regLambda", "req lambda",
                      H2OTypeConverters.toFloat())

    regAlpha = Param(Params._dummy(), "regAlpha", "req aplha",
                     H2OTypeConverters.toFloat())

    sampleType = Param(
        Params._dummy(), "sampleType", "Dart Sample Type",
        H2OTypeConverters.toEnumString(
            "hex.tree.xgboost.XGBoostModel$XGBoostParameters$DartSampleType"))

    normalizeType = Param(
        Params._dummy(), "normalizeType", "Dart Normalize Type",
        H2OTypeConverters.toEnumString(
            "hex.tree.xgboost.XGBoostModel$XGBoostParameters$DartNormalizeType"
        ))

    rateDrop = Param(Params._dummy(), "rateDrop", "rate drop",
                     H2OTypeConverters.toFloat())

    oneDrop = Param(Params._dummy(), "oneDrop", "onde drop",
                    H2OTypeConverters.toBoolean())

    skipDrop = Param(Params._dummy(), "skipDrop", "skip drop",
                     H2OTypeConverters.toFloat())

    gpuId = Param(Params._dummy(), "gpuId", "GPU id",
                  H2OTypeConverters.toInt())

    backend = Param(
        Params._dummy(), "backend", "Backend",
        H2OTypeConverters.toEnumString(
            "hex.tree.xgboost.XGBoostModel$XGBoostParameters$Backend"))

    saveMatrixDirectory = Param(
        Params._dummy(), "saveMatrixDirectory",
        "Directory where to save matrices passed to XGBoost library. Useful for debugging.",
        H2OTypeConverters.toNullableString())

    ignoredCols = Param(Params._dummy(), "ignoredCols",
                        "Names of columns to ignore for training.",
                        H2OTypeConverters.toNullableListString())

    buildTreeOneNode = Param(
        Params._dummy(), "buildTreeOneNode",
        "Run on one node only; no network overhead but fewer cpus used. Suitable for small datasets.",
        H2OTypeConverters.toBoolean())

    maxRuntimeSecs = Param(
        Params._dummy(), "maxRuntimeSecs",
        "Maximum allowed runtime in seconds for model training. Use 0 to disable.",
        H2OTypeConverters.toFloat())

    scoreEachIteration = Param(
        Params._dummy(), "scoreEachIteration",
        "Whether to score during each iteration of model training.",
        H2OTypeConverters.toBoolean())

    customDistributionFunc = Param(
        Params._dummy(), "customDistributionFunc",
        "Reference to custom distribution, format: `language:keyName=funcName`",
        H2OTypeConverters.toNullableString())

    customMetricFunc = Param(
        Params._dummy(), "customMetricFunc",
        "Reference to custom evaluation function, format: `language:keyName=funcName`",
        H2OTypeConverters.toNullableString())

    huberAlpha = Param(
        Params._dummy(), "huberAlpha",
        "Desired quantile for Huber/M-regression (threshold between quadratic and linear loss,"
        " must be between 0 and 1).", H2OTypeConverters.toFloat())

    keepCrossValidationModels = Param(
        Params._dummy(), "keepCrossValidationModels",
        "Whether to keep the cross-validation models.",
        H2OTypeConverters.toBoolean())

    calibrateModel = Param(
        Params._dummy(), "calibrateModel",
        "Use Platt Scaling to calculate calibrated class probabilities. Calibration can provide more accurate "
        "estimates of class probabilities.", H2OTypeConverters.toBoolean())

    foldAssignment = Param(
        Params._dummy(), "foldAssignment",
        "Cross-validation fold assignment scheme, if fold_column is not specified. The 'Stratified' option will "
        "stratify the folds based on the response variable, for classification problems.",
        H2OTypeConverters.toEnumString(
            "hex.Model$Parameters$FoldAssignmentScheme"))

    tweediePower = Param(
        Params._dummy(), "tweediePower",
        "Tweedie power for Tweedie regression, must be between 1 and 2.",
        H2OTypeConverters.toFloat())

    categoricalEncoding = Param(
        Params._dummy(), "categoricalEncoding",
        "Encoding scheme for categorical features",
        H2OTypeConverters.toEnumString(
            "hex.Model$Parameters$CategoricalEncodingScheme"))

    maxCategoricalLevels = Param(
        Params._dummy(), "maxCategoricalLevels",
        "For every categorical feature, only use this many most frequent categorical levels for model training. "
        "Only used for categorical_encoding == EnumLimited.",
        H2OTypeConverters.toInt())

    exportCheckpointsDir = Param(
        Params._dummy(), "exportCheckpointsDir",
        "Automatically export generated models to this directory.",
        H2OTypeConverters.toNullableString())

    quantileAlpha = Param(
        Params._dummy(), "quantileAlpha",
        "Desired quantile for Quantile regression, must be between 0 and 1.",
        H2OTypeConverters.toFloat())

    ignoreConstCols = Param(Params._dummy(), "ignoreConstCols",
                            "Ignore constant columns.",
                            H2OTypeConverters.toBoolean())

    ##
    # Getters
    ##
    def getQuietMode(self):
        return self.getOrDefault(self.quietMode)

    def getMaxDepth(self):
        return self.getOrDefault(self.maxDepth)

    def getMinRows(self):
        return self.getOrDefault(self.minRows)

    def getMinChildWeight(self):
        return self.getOrDefault(self.minChildWeight)

    def getLearnRate(self):
        return self.getOrDefault(self.learnRate)

    def getEta(self):
        return self.getOrDefault(self.eta)

    def getSampleRate(self):
        return self.getOrDefault(self.sampleRate)

    def getSubsample(self):
        return self.getOrDefault(self.subsample)

    def getColSampleRate(self):
        return self.getOrDefault(self.colSampleRate)

    def getColSampleByLevel(self):
        return self.getOrDefault(self.colSampleByLevel)

    def getColSampleRatePerTree(self):
        return self.getOrDefault(self.colSampleRatePerTree)

    def getColSampleByTree(self):
        return self.getOrDefault(self.colSampleByTree)

    def getColSampleByNode(self):
        return self.getOrDefault(self.colSampleByNode)

    def getMaxAbsLeafnodePred(self):
        return self.getOrDefault(self.maxAbsLeafnodePred)

    def getMaxDeltaStep(self):
        return self.getOrDefault(self.maxDeltaStep)

    def getScoreTreeInterval(self):
        return self.getOrDefault(self.scoreTreeInterval)

    def getMinSplitImprovement(self):
        return self.getOrDefault(self.minSplitImprovement)

    def getGamma(self):
        return self.getOrDefault(self.gamma)

    def getNthread(self):
        return self.getOrDefault(self.nthread)

    def getMaxBins(self):
        return self.getOrDefault(self.maxBins)

    def getMaxLeaves(self):
        return self.getOrDefault(self.maxLeaves)

    def getMinSumHessianInLeaf(self):
        return self.getOrDefault(self.minSumHessianInLeaf)

    def getMinDataInLeaf(self):
        return self.getOrDefault(self.minDataInLeaf)

    def getTreeMethod(self):
        return self.getOrDefault(self.treeMethod)

    def getGrowPolicy(self):
        return self.getOrDefault(self.growPolicy)

    def getBooster(self):
        return self.getOrDefault(self.booster)

    def getDmatrixType(self):
        return self.getOrDefault(self.dmatrixType)

    def getRegLambda(self):
        return self.getOrDefault(self.regLambda)

    def getRegAlpha(self):
        return self.getOrDefault(self.regAlpha)

    def getSampleType(self):
        return self.getOrDefault(self.sampleType)

    def getNormalizeType(self):
        return self.getOrDefault(self.normalizeType)

    def getRateDrop(self):
        return self.getOrDefault(self.rateDrop)

    def getOneDrop(self):
        return self.getOrDefault(self.oneDrop)

    def getSkipDrop(self):
        return self.getOrDefault(self.skipDrop)

    def getGpuId(self):
        return self.getOrDefault(self.gpuId)

    def getBackend(self):
        return self.getOrDefault(self.backend)

    def getSaveMatrixDirectory(self):
        return self.getOrDefault(self.saveMatrixDirectory)

    def getIgnoredCols(self):
        return self.getOrDefault(self.ignoredCols)

    def getIgnoreConstCols(self):
        return self.getOrDefault(self.ignoreConstCols)

    def getBuildTreeOneNode(self):
        return self.getOrDefault(self.buildTreeOneNode)

    def getMaxRuntimeSecs(self):
        return self.getOrDefault(self.maxRuntimeSecs)

    def getScoreEachIteration(self):
        return self.getOrDefault(self.scoreEachIteration)

    def getCustomDistributionFunc(self):
        return self.getOrDefault(self.customDistributionFunc)

    def getCustomMetricFunc(self):
        return self.getOrDefault(self.customMetricFunc)

    def getHuberAlpha(self):
        return self.getOrDefault(self.huberAlpha)

    def getKeepCrossValidationModels(self):
        return self.getOrDefault(self.keepCrossValidationModels)

    def getCalibrateModel(self):
        return self.getOrDefault(self.calibrateModel)

    def getFoldAssignment(self):
        return self.getOrDefault(self.foldAssignment)

    def getTweediePower(self):
        return self.getOrDefault(self.tweediePower)

    def getCategoricalEncoding(self):
        return self.getOrDefault(self.categoricalEncoding)

    def getMaxCategoricalLevels(self):
        return self.getOrDefault(self.maxCategoricalLevels)

    def getExportCheckpointsDir(self):
        return self.getOrDefault(self.exportCheckpointsDir)

    def getQuantileAlpha(self):
        return self.getOrDefault(self.quantileAlpha)

    ##
    # Setters
    ##
    def setQuietMode(self, value):
        return self._set(quietMode=value)

    def setNtrees(self, value):
        return self._set(ntrees=value)

    def setMaxDepth(self, value):
        return self._set(maxDepth=value)

    def setMinRows(self, value):
        return self._set(minRows=value)

    def setMinChildWeight(self, value):
        return self._set(minChildWeight=value)

    def setLearnRate(self, value):
        return self._set(learnRate=value)

    def setEta(self, value):
        return self._set(eta=value)

    def setSampleRate(self, value):
        return self._set(sampleRate=value)

    def setSubsample(self, value):
        return self._set(subsample=value)

    def setColSampleRate(self, value):
        return self._set(colSampleRate=value)

    def setColSampleByLevel(self, value):
        return self._set(colSampleByLevel=value)

    def setColSampleRatePerTree(self, value):
        return self._set(colSampleRatePerTree=value)

    def setColSampleByTree(self, value):
        return self._set(colSampleByTree=value)

    def setColSampleByNode(self, value):
        return self._set(colSampleByNode=value)

    def setMaxAbsLeafnodePred(self, value):
        return self._set(maxAbsLeafnodePred=value)

    def setMaxDeltaStep(self, value):
        return self._set(maxDeltaStep=value)

    def setScoreTreeInterval(self, value):
        return self._set(scoreTreeInterval=value)

    def setMinSplitImprovement(self, value):
        return self._set(minSplitImprovement=value)

    def setGamma(self, value):
        return self._set(gamma=value)

    def setNthread(self, value):
        return self._set(nthread=value)

    def setMaxBins(self, value):
        return self._set(maxBins=value)

    def setMaxLeaves(self, value):
        return self._set(maxLeaves=value)

    def setMinSumHessianInLeaf(self, value):
        return self._set(minSumHessianInLeaf=value)

    def setMinDataInLeaf(self, value):
        return self._set(minDataInLeaf=value)

    def setTreeMethod(self, value):
        return self._set(treeMethod=value)

    def setGrowPolicy(self, value):
        return self._set(growPolicy=value)

    def setBooster(self, value):
        return self._set(booster=value)

    def setDmatrixType(self, value):
        return self._set(dmatrixType=value)

    def setRegLambda(self, value):
        return self._set(regLambda=value)

    def setRegAlpha(self, value):
        return self._set(regAlpha=value)

    def setSampleType(self, value):
        return self._set(sampleType=value)

    def setNormalizeType(self, value):
        return self._set(normalizeType=value)

    def setRateDrop(self, value):
        return self._set(rateDrop=value)

    def setOneDrop(self, value):
        return self._set(oneDrop=value)

    def setSkipDrop(self, value):
        return self._set(skipDrop=value)

    def setGpuId(self, value):
        return self._set(gpuId=value)

    def setBackend(self, value):
        return self._set(backend=value)

    def setSaveMatrixDirectory(self, value):
        return self._set(saveMatrixDirectory=value)

    def setIgnoredCols(self, value):
        return self._set(ignoredCols=value)

    def setIgnoreConstCols(self, value):
        return self._set(ignoreConstCols=value)

    def setBuildTreeOneNode(self, value):
        return self._set(buildTreeOneNode=value)

    def setMaxRuntimeSecs(self, value):
        return self._set(maxRuntimeSecs=value)

    def setScoreEachIteration(self, value):
        return self._set(scoreEachIteration=value)

    def setCustomDistributionFunc(self, value):
        return self._set(customDistributionFunc=value)

    def setCustomMetricFunc(self, value):
        return self._set(customMetricFunc=value)

    def setHuberAlpha(self, value):
        return self._set(huberAlpha=value)

    def setKeepCrossValidationModels(self, value):
        return self._set(keepCrossValidationModels=value)

    def setCalibrateModel(self, value):
        return self._set(calibrateModel=value)

    def setFoldAssignment(self, value):
        return self._set(foldAssignment=value)

    def setTweediePower(self, value):
        return self._set(tweediePower=value)

    def setCategoricalEncoding(self, value):
        return self._set(categoricalEncoding=value)

    def setExportCheckpointsDir(self, value):
        return self._set(exportCheckpointsDir=value)

    def setQuantileAlpha(self, value):
        return self._set(quantileAlpha=value)

    def setMaxCategoricalLevels(self, value):
        return self._set(maxCategoricalLevels=value)
Пример #30
0
 def getCurrentMetrics(self):
     return H2OTypeConverters.scalaMapStringStringToDictStringAny(self._java_obj.getCurrentMetrics())