class H2OSupervisedMOJOParams(H2OMOJOAlgoSharedParams):

    offsetCol = Param(Params._dummy(), "offsetCol", "Offset column name",
                      H2OTypeConverters.toNullableString())

    def getOffsetCol(self):
        return self.getOrDefault(self.offsetCol)
Пример #2
0
class H2OXGBoostParams(H2OAlgoSupervisedParams,
                       H2OTreeBasedSupervisedMOJOParams,
                       HasMonotoneConstraints, HasStoppingCriteria):
    ##
    # Param definitions
    ##
    quietMode = Param(Params._dummy(), "quietMode", "Quiet mode",
                      H2OTypeConverters.toBoolean())

    maxDepth = Param(Params._dummy(), "maxDepth", "Maximal depth",
                     H2OTypeConverters.toInt())

    minRows = Param(Params._dummy(), "minRows", "Min rows",
                    H2OTypeConverters.toFloat())

    minChildWeight = Param(Params._dummy(), "minChildWeight",
                           "minimal child weight", H2OTypeConverters.toFloat())

    learnRate = Param(Params._dummy(), "learnRate", "learn rate",
                      H2OTypeConverters.toFloat())

    eta = Param(Params._dummy(), "eta", "eta", H2OTypeConverters.toFloat())

    sampleRate = Param(Params._dummy(), "sampleRate", "Sample rate",
                       H2OTypeConverters.toFloat())

    subsample = Param(Params._dummy(), "subsample", "subsample",
                      H2OTypeConverters.toFloat())

    colSampleRate = Param(Params._dummy(), "colSampleRate", "col sample rate",
                          H2OTypeConverters.toFloat())

    colSampleByLevel = Param(Params._dummy(),
                             "colSampleByLevel", "Col Sample By Level",
                             H2OTypeConverters.toFloat())

    colSampleRatePerTree = Param(Params._dummy(), "colSampleRatePerTree",
                                 "col samle rate", H2OTypeConverters.toFloat())

    colSampleByTree = Param(Params._dummy(), "colSampleByTree",
                            "col sample by tree", H2OTypeConverters.toFloat())

    colSampleByNode = Param(Params._dummy(), "colSampleByNode",
                            "col sample by node", H2OTypeConverters.toFloat())

    maxAbsLeafnodePred = Param(Params._dummy(), "maxAbsLeafnodePred",
                               "max abs lead node prediction",
                               H2OTypeConverters.toFloat())

    maxDeltaStep = Param(Params._dummy(), "maxDeltaStep", "max delta step",
                         H2OTypeConverters.toFloat())

    scoreTreeInterval = Param(Params._dummy(), "scoreTreeInterval",
                              "score tree interval", H2OTypeConverters.toInt())

    minSplitImprovement = Param(Params._dummy(), "minSplitImprovement",
                                "Min split improvement",
                                H2OTypeConverters.toFloat())

    gamma = Param(Params._dummy(), "gamma", "gamma",
                  H2OTypeConverters.toFloat())

    nthread = Param(Params._dummy(), "nthread", "nthread",
                    H2OTypeConverters.toInt())

    maxBins = Param(Params._dummy(), "maxBins", "nbins",
                    H2OTypeConverters.toInt())

    maxLeaves = Param(Params._dummy(), "maxLeaves", "max leaves",
                      H2OTypeConverters.toInt())

    minSumHessianInLeaf = Param(Params._dummy(), "minSumHessianInLeaf",
                                "min sum hessian in leaf",
                                H2OTypeConverters.toFloat())

    minDataInLeaf = Param(Params._dummy(), "minDataInLeaf", "min data in leaf",
                          H2OTypeConverters.toFloat())

    treeMethod = Param(
        Params._dummy(), "treeMethod", "Tree Method",
        H2OTypeConverters.toEnumString(
            "hex.tree.xgboost.XGBoostModel$XGBoostParameters$TreeMethod"))

    growPolicy = Param(
        Params._dummy(), "growPolicy", "Grow Policy",
        H2OTypeConverters.toEnumString(
            "hex.tree.xgboost.XGBoostModel$XGBoostParameters$GrowPolicy"))

    booster = Param(
        Params._dummy(), "booster", "Booster",
        H2OTypeConverters.toEnumString(
            "hex.tree.xgboost.XGBoostModel$XGBoostParameters$Booster"))

    dmatrixType = Param(
        Params._dummy(), "dmatrixType", "DMatrix type",
        H2OTypeConverters.toEnumString(
            "hex.tree.xgboost.XGBoostModel$XGBoostParameters$DMatrixType"))

    regLambda = Param(Params._dummy(), "regLambda", "req lambda",
                      H2OTypeConverters.toFloat())

    regAlpha = Param(Params._dummy(), "regAlpha", "req aplha",
                     H2OTypeConverters.toFloat())

    sampleType = Param(
        Params._dummy(), "sampleType", "Dart Sample Type",
        H2OTypeConverters.toEnumString(
            "hex.tree.xgboost.XGBoostModel$XGBoostParameters$DartSampleType"))

    normalizeType = Param(
        Params._dummy(), "normalizeType", "Dart Normalize Type",
        H2OTypeConverters.toEnumString(
            "hex.tree.xgboost.XGBoostModel$XGBoostParameters$DartNormalizeType"
        ))

    rateDrop = Param(Params._dummy(), "rateDrop", "rate drop",
                     H2OTypeConverters.toFloat())

    oneDrop = Param(Params._dummy(), "oneDrop", "onde drop",
                    H2OTypeConverters.toBoolean())

    skipDrop = Param(Params._dummy(), "skipDrop", "skip drop",
                     H2OTypeConverters.toFloat())

    gpuId = Param(Params._dummy(), "gpuId", "GPU id",
                  H2OTypeConverters.toInt())

    backend = Param(
        Params._dummy(), "backend", "Backend",
        H2OTypeConverters.toEnumString(
            "hex.tree.xgboost.XGBoostModel$XGBoostParameters$Backend"))

    saveMatrixDirectory = Param(
        Params._dummy(), "saveMatrixDirectory",
        "Directory where to save matrices passed to XGBoost library. Useful for debugging.",
        H2OTypeConverters.toNullableString())

    ignoredCols = Param(Params._dummy(), "ignoredCols",
                        "Names of columns to ignore for training.",
                        H2OTypeConverters.toNullableListString())

    buildTreeOneNode = Param(
        Params._dummy(), "buildTreeOneNode",
        "Run on one node only; no network overhead but fewer cpus used. Suitable for small datasets.",
        H2OTypeConverters.toBoolean())

    maxRuntimeSecs = Param(
        Params._dummy(), "maxRuntimeSecs",
        "Maximum allowed runtime in seconds for model training. Use 0 to disable.",
        H2OTypeConverters.toFloat())

    scoreEachIteration = Param(
        Params._dummy(), "scoreEachIteration",
        "Whether to score during each iteration of model training.",
        H2OTypeConverters.toBoolean())

    customDistributionFunc = Param(
        Params._dummy(), "customDistributionFunc",
        "Reference to custom distribution, format: `language:keyName=funcName`",
        H2OTypeConverters.toNullableString())

    customMetricFunc = Param(
        Params._dummy(), "customMetricFunc",
        "Reference to custom evaluation function, format: `language:keyName=funcName`",
        H2OTypeConverters.toNullableString())

    huberAlpha = Param(
        Params._dummy(), "huberAlpha",
        "Desired quantile for Huber/M-regression (threshold between quadratic and linear loss,"
        " must be between 0 and 1).", H2OTypeConverters.toFloat())

    keepCrossValidationModels = Param(
        Params._dummy(), "keepCrossValidationModels",
        "Whether to keep the cross-validation models.",
        H2OTypeConverters.toBoolean())

    calibrateModel = Param(
        Params._dummy(), "calibrateModel",
        "Use Platt Scaling to calculate calibrated class probabilities. Calibration can provide more accurate "
        "estimates of class probabilities.", H2OTypeConverters.toBoolean())

    foldAssignment = Param(
        Params._dummy(), "foldAssignment",
        "Cross-validation fold assignment scheme, if fold_column is not specified. The 'Stratified' option will "
        "stratify the folds based on the response variable, for classification problems.",
        H2OTypeConverters.toEnumString(
            "hex.Model$Parameters$FoldAssignmentScheme"))

    tweediePower = Param(
        Params._dummy(), "tweediePower",
        "Tweedie power for Tweedie regression, must be between 1 and 2.",
        H2OTypeConverters.toFloat())

    categoricalEncoding = Param(
        Params._dummy(), "categoricalEncoding",
        "Encoding scheme for categorical features",
        H2OTypeConverters.toEnumString(
            "hex.Model$Parameters$CategoricalEncodingScheme"))

    maxCategoricalLevels = Param(
        Params._dummy(), "maxCategoricalLevels",
        "For every categorical feature, only use this many most frequent categorical levels for model training. "
        "Only used for categorical_encoding == EnumLimited.",
        H2OTypeConverters.toInt())

    exportCheckpointsDir = Param(
        Params._dummy(), "exportCheckpointsDir",
        "Automatically export generated models to this directory.",
        H2OTypeConverters.toNullableString())

    quantileAlpha = Param(
        Params._dummy(), "quantileAlpha",
        "Desired quantile for Quantile regression, must be between 0 and 1.",
        H2OTypeConverters.toFloat())

    ignoreConstCols = Param(Params._dummy(), "ignoreConstCols",
                            "Ignore constant columns.",
                            H2OTypeConverters.toBoolean())

    ##
    # Getters
    ##
    def getQuietMode(self):
        return self.getOrDefault(self.quietMode)

    def getMaxDepth(self):
        return self.getOrDefault(self.maxDepth)

    def getMinRows(self):
        return self.getOrDefault(self.minRows)

    def getMinChildWeight(self):
        return self.getOrDefault(self.minChildWeight)

    def getLearnRate(self):
        return self.getOrDefault(self.learnRate)

    def getEta(self):
        return self.getOrDefault(self.eta)

    def getSampleRate(self):
        return self.getOrDefault(self.sampleRate)

    def getSubsample(self):
        return self.getOrDefault(self.subsample)

    def getColSampleRate(self):
        return self.getOrDefault(self.colSampleRate)

    def getColSampleByLevel(self):
        return self.getOrDefault(self.colSampleByLevel)

    def getColSampleRatePerTree(self):
        return self.getOrDefault(self.colSampleRatePerTree)

    def getColSampleByTree(self):
        return self.getOrDefault(self.colSampleByTree)

    def getColSampleByNode(self):
        return self.getOrDefault(self.colSampleByNode)

    def getMaxAbsLeafnodePred(self):
        return self.getOrDefault(self.maxAbsLeafnodePred)

    def getMaxDeltaStep(self):
        return self.getOrDefault(self.maxDeltaStep)

    def getScoreTreeInterval(self):
        return self.getOrDefault(self.scoreTreeInterval)

    def getMinSplitImprovement(self):
        return self.getOrDefault(self.minSplitImprovement)

    def getGamma(self):
        return self.getOrDefault(self.gamma)

    def getNthread(self):
        return self.getOrDefault(self.nthread)

    def getMaxBins(self):
        return self.getOrDefault(self.maxBins)

    def getMaxLeaves(self):
        return self.getOrDefault(self.maxLeaves)

    def getMinSumHessianInLeaf(self):
        return self.getOrDefault(self.minSumHessianInLeaf)

    def getMinDataInLeaf(self):
        return self.getOrDefault(self.minDataInLeaf)

    def getTreeMethod(self):
        return self.getOrDefault(self.treeMethod)

    def getGrowPolicy(self):
        return self.getOrDefault(self.growPolicy)

    def getBooster(self):
        return self.getOrDefault(self.booster)

    def getDmatrixType(self):
        return self.getOrDefault(self.dmatrixType)

    def getRegLambda(self):
        return self.getOrDefault(self.regLambda)

    def getRegAlpha(self):
        return self.getOrDefault(self.regAlpha)

    def getSampleType(self):
        return self.getOrDefault(self.sampleType)

    def getNormalizeType(self):
        return self.getOrDefault(self.normalizeType)

    def getRateDrop(self):
        return self.getOrDefault(self.rateDrop)

    def getOneDrop(self):
        return self.getOrDefault(self.oneDrop)

    def getSkipDrop(self):
        return self.getOrDefault(self.skipDrop)

    def getGpuId(self):
        return self.getOrDefault(self.gpuId)

    def getBackend(self):
        return self.getOrDefault(self.backend)

    def getSaveMatrixDirectory(self):
        return self.getOrDefault(self.saveMatrixDirectory)

    def getIgnoredCols(self):
        return self.getOrDefault(self.ignoredCols)

    def getIgnoreConstCols(self):
        return self.getOrDefault(self.ignoreConstCols)

    def getBuildTreeOneNode(self):
        return self.getOrDefault(self.buildTreeOneNode)

    def getMaxRuntimeSecs(self):
        return self.getOrDefault(self.maxRuntimeSecs)

    def getScoreEachIteration(self):
        return self.getOrDefault(self.scoreEachIteration)

    def getCustomDistributionFunc(self):
        return self.getOrDefault(self.customDistributionFunc)

    def getCustomMetricFunc(self):
        return self.getOrDefault(self.customMetricFunc)

    def getHuberAlpha(self):
        return self.getOrDefault(self.huberAlpha)

    def getKeepCrossValidationModels(self):
        return self.getOrDefault(self.keepCrossValidationModels)

    def getCalibrateModel(self):
        return self.getOrDefault(self.calibrateModel)

    def getFoldAssignment(self):
        return self.getOrDefault(self.foldAssignment)

    def getTweediePower(self):
        return self.getOrDefault(self.tweediePower)

    def getCategoricalEncoding(self):
        return self.getOrDefault(self.categoricalEncoding)

    def getMaxCategoricalLevels(self):
        return self.getOrDefault(self.maxCategoricalLevels)

    def getExportCheckpointsDir(self):
        return self.getOrDefault(self.exportCheckpointsDir)

    def getQuantileAlpha(self):
        return self.getOrDefault(self.quantileAlpha)

    ##
    # Setters
    ##
    def setQuietMode(self, value):
        return self._set(quietMode=value)

    def setNtrees(self, value):
        return self._set(ntrees=value)

    def setMaxDepth(self, value):
        return self._set(maxDepth=value)

    def setMinRows(self, value):
        return self._set(minRows=value)

    def setMinChildWeight(self, value):
        return self._set(minChildWeight=value)

    def setLearnRate(self, value):
        return self._set(learnRate=value)

    def setEta(self, value):
        return self._set(eta=value)

    def setSampleRate(self, value):
        return self._set(sampleRate=value)

    def setSubsample(self, value):
        return self._set(subsample=value)

    def setColSampleRate(self, value):
        return self._set(colSampleRate=value)

    def setColSampleByLevel(self, value):
        return self._set(colSampleByLevel=value)

    def setColSampleRatePerTree(self, value):
        return self._set(colSampleRatePerTree=value)

    def setColSampleByTree(self, value):
        return self._set(colSampleByTree=value)

    def setColSampleByNode(self, value):
        return self._set(colSampleByNode=value)

    def setMaxAbsLeafnodePred(self, value):
        return self._set(maxAbsLeafnodePred=value)

    def setMaxDeltaStep(self, value):
        return self._set(maxDeltaStep=value)

    def setScoreTreeInterval(self, value):
        return self._set(scoreTreeInterval=value)

    def setMinSplitImprovement(self, value):
        return self._set(minSplitImprovement=value)

    def setGamma(self, value):
        return self._set(gamma=value)

    def setNthread(self, value):
        return self._set(nthread=value)

    def setMaxBins(self, value):
        return self._set(maxBins=value)

    def setMaxLeaves(self, value):
        return self._set(maxLeaves=value)

    def setMinSumHessianInLeaf(self, value):
        return self._set(minSumHessianInLeaf=value)

    def setMinDataInLeaf(self, value):
        return self._set(minDataInLeaf=value)

    def setTreeMethod(self, value):
        return self._set(treeMethod=value)

    def setGrowPolicy(self, value):
        return self._set(growPolicy=value)

    def setBooster(self, value):
        return self._set(booster=value)

    def setDmatrixType(self, value):
        return self._set(dmatrixType=value)

    def setRegLambda(self, value):
        return self._set(regLambda=value)

    def setRegAlpha(self, value):
        return self._set(regAlpha=value)

    def setSampleType(self, value):
        return self._set(sampleType=value)

    def setNormalizeType(self, value):
        return self._set(normalizeType=value)

    def setRateDrop(self, value):
        return self._set(rateDrop=value)

    def setOneDrop(self, value):
        return self._set(oneDrop=value)

    def setSkipDrop(self, value):
        return self._set(skipDrop=value)

    def setGpuId(self, value):
        return self._set(gpuId=value)

    def setBackend(self, value):
        return self._set(backend=value)

    def setSaveMatrixDirectory(self, value):
        return self._set(saveMatrixDirectory=value)

    def setIgnoredCols(self, value):
        return self._set(ignoredCols=value)

    def setIgnoreConstCols(self, value):
        return self._set(ignoreConstCols=value)

    def setBuildTreeOneNode(self, value):
        return self._set(buildTreeOneNode=value)

    def setMaxRuntimeSecs(self, value):
        return self._set(maxRuntimeSecs=value)

    def setScoreEachIteration(self, value):
        return self._set(scoreEachIteration=value)

    def setCustomDistributionFunc(self, value):
        return self._set(customDistributionFunc=value)

    def setCustomMetricFunc(self, value):
        return self._set(customMetricFunc=value)

    def setHuberAlpha(self, value):
        return self._set(huberAlpha=value)

    def setKeepCrossValidationModels(self, value):
        return self._set(keepCrossValidationModels=value)

    def setCalibrateModel(self, value):
        return self._set(calibrateModel=value)

    def setFoldAssignment(self, value):
        return self._set(foldAssignment=value)

    def setTweediePower(self, value):
        return self._set(tweediePower=value)

    def setCategoricalEncoding(self, value):
        return self._set(categoricalEncoding=value)

    def setExportCheckpointsDir(self, value):
        return self._set(exportCheckpointsDir=value)

    def setQuantileAlpha(self, value):
        return self._set(quantileAlpha=value)

    def setMaxCategoricalLevels(self, value):
        return self._set(maxCategoricalLevels=value)
Пример #3
0
class H2OAutoMLParams(H2OCommonSupervisedParams, HasMonotoneConstraints):
    ##
    # Param definitions
    ##
    ignoredCols = Param(
        Params._dummy(),
        "ignoredCols",
        "Ignored column names",
        H2OTypeConverters.toListString())

    includeAlgos = Param(
        Params._dummy(),
        "includeAlgos",
        "Algorithms to include when using automl",
        H2OTypeConverters.toEnumListString("ai.h2o.automl.Algo"))

    excludeAlgos = Param(
        Params._dummy(),
        "excludeAlgos",
        "Algorithms to exclude when using automl",
        H2OTypeConverters.toEnumListString("ai.h2o.automl.Algo"))

    projectName = Param(
        Params._dummy(),
        "projectName",
        "identifier for models that should be grouped together in the leaderboard "
        "(e.g., airlines and iris)",
        H2OTypeConverters.toNullableString())

    maxRuntimeSecs = Param(
        Params._dummy(),
        "maxRuntimeSecs",
        "Maximum time in seconds for automl to be running",
        H2OTypeConverters.toFloat())

    stoppingRounds = Param(
        Params._dummy(),
        "stoppingRounds",
        "Stopping rounds",
        H2OTypeConverters.toInt())

    stoppingTolerance = Param(
        Params._dummy(),
        "stoppingTolerance",
        "Stopping tolerance",
        H2OTypeConverters.toFloat())

    stoppingMetric = Param(
        Params._dummy(),
        "stoppingMetric",
        "Stopping metric",
        H2OTypeConverters.toEnumString("hex.ScoreKeeper$StoppingMetric"))

    sortMetric = Param(
        Params._dummy(),
        "sortMetric",
        "Sort metric for the AutoML leaderboard",
        H2OTypeConverters.toEnumString("ai.h2o.sparkling.ml.algos.H2OAutoMLSortMetric"))

    balanceClasses = Param(
        Params._dummy(),
        "balanceClasses",
        "Balance classes",
        H2OTypeConverters.toBoolean())

    classSamplingFactors = Param(
        Params._dummy(),
        "classSamplingFactors",
        "Class sampling factors",
        H2OTypeConverters.toNullableListFloat())

    maxAfterBalanceSize = Param(
        Params._dummy(),
        "maxAfterBalanceSize",
        "Max after balance size",
        H2OTypeConverters.toFloat())

    keepCrossValidationPredictions = Param(
        Params._dummy(),
        "keepCrossValidationPredictions",
        "Keep cross validation predictions",
        H2OTypeConverters.toBoolean())

    keepCrossValidationModels = Param(
        Params._dummy(),
        "keepCrossValidationModels",
        "Keep cross validation models",
        H2OTypeConverters.toBoolean())

    maxModels = Param(
        Params._dummy(),
        "maxModels",
        "Max models to train in AutoML",
        H2OTypeConverters.toInt())

    ##
    # Getters
    ##
    def getIgnoredCols(self):
        return self.getOrDefault(self.ignoredCols)

    def getTryMutations(self):
        return self.getOrDefault(self.tryMutations)

    def getExcludeAlgos(self):
        return self.getOrDefault(self.excludeAlgos)

    def getIncludeAlgos(self):
        return self.getOrDefault(self.includeAlgos)

    def getProjectName(self):
        return self.getOrDefault(self.projectName)

    def getMaxRuntimeSecs(self):
        return self.getOrDefault(self.maxRuntimeSecs)

    def getStoppingRounds(self):
        return self.getOrDefault(self.stoppingRounds)

    def getStoppingTolerance(self):
        return self.getOrDefault(self.stoppingTolerance)

    def getStoppingMetric(self):
        return self.getOrDefault(self.stoppingMetric)

    def getSortMetric(self):
        return self.getOrDefault(self.sortMetric)

    def getBalanceClasses(self):
        return self.getOrDefault(self.balanceClasses)

    def getClassSamplingFactors(self):
        return self.getOrDefault(self.classSamplingFactors)

    def getMaxAfterBalanceSize(self):
        return self.getOrDefault(self.maxAfterBalanceSize)

    def getKeepCrossValidationPredictions(self):
        return self.getOrDefault(self.keepCrossValidationPredictions)

    def getKeepCrossValidationModels(self):
        return self.getOrDefault(self.keepCrossValidationModels)

    def getMaxModels(self):
        return self.getOrDefault(self.maxModels)

    ##
    # Setters
    ##
    def setIgnoredCols(self, value):
        return self._set(ignoredCols=value)

    def setTryMutations(self, value):
        return self._set(tryMutations=value)

    def setIncludeAlgos(self, value):
        return self._set(includeAlgos=value)

    def setExcludeAlgos(self, value):
        return self._set(excludeAlgos=value)

    def setProjectName(self, value):
        return self._set(projectName=value)

    def setMaxRuntimeSecs(self, value):
        return self._set(maxRuntimeSecs=value)

    def setStoppingRounds(self, value):
        return self._set(stoppingRounds=value)

    def setStoppingTolerance(self, value):
        return self._set(stoppingTolerance=value)

    def setStoppingMetric(self, value):
        return self._set(stoppingMetric=value)

    def setSortMetric(self, value):
        return self._set(sortMetric=value)

    def setBalanceClasses(self, value):
        return self._set(balanceClasses=value)

    def setClassSamplingFactors(self, value):
        return self._set(classSamplingFactors=value)

    def setMaxAfterBalanceSize(self, value):
        return self._set(maxAfterBalanceSize=value)

    def setKeepCrossValidationPredictions(self, value):
        return self._set(keepCrossValidationPredictions=value)

    def setKeepCrossValidationModels(self, value):
        return self._set(keepCrossValidationModels=value)

    def setMaxModels(self, value):
        return self._set(maxModels=value)
Пример #4
0
class H2OCommonParams(H2OMOJOAlgoSharedParams):
    foldCol = Param(Params._dummy(), "foldCol", "Fold column name",
                    H2OTypeConverters.toNullableString())

    weightCol = Param(Params._dummy(), "weightCol", "Weight column name",
                      H2OTypeConverters.toNullableString())

    splitRatio = Param(
        Params._dummy(), "splitRatio",
        "Accepts values in range [0, 1.0] which determine how large part of dataset is used for training"
        " and for validation. For example, 0.8 -> 80% training 20% validation.",
        H2OTypeConverters.toFloat())

    seed = Param(Params._dummy(), "seed",
                 "Used to specify seed to reproduce the model run",
                 H2OTypeConverters.toInt())

    nfolds = Param(Params._dummy(), "nfolds", "Number of fold columns",
                   H2OTypeConverters.toInt())

    allStringColumnsToCategorical = Param(
        Params._dummy(), "allStringColumnsToCategorical",
        "Transform all strings columns to categorical",
        H2OTypeConverters.toBoolean())

    columnsToCategorical = Param(
        Params._dummy(), "columnsToCategorical",
        "List of columns to convert to categorical before modelling",
        H2OTypeConverters.toListString())

    ##
    # Getters
    ##
    def getFoldCol(self):
        return self.getOrDefault(self.foldCol)

    def getWeightCol(self):
        return self.getOrDefault(self.weightCol)

    def getSplitRatio(self):
        return self.getOrDefault(self.splitRatio)

    def getSeed(self):
        return self.getOrDefault(self.seed)

    def getNfolds(self):
        return self.getOrDefault(self.nfolds)

    def getAllStringColumnsToCategorical(self):
        return self.getOrDefault(self.allStringColumnsToCategorical)

    def getColumnsToCategorical(self):
        return self.getOrDefault(self.columnsToCategorical)

    ##
    # Setters
    ##
    def setFoldCol(self, value):
        return self._set(foldCol=value)

    def setWeightCol(self, value):
        return self._set(weightCol=value)

    def setSplitRatio(self, value):
        return self._set(splitRatio=value)

    def setSeed(self, value):
        return self._set(seed=value)

    def setNfolds(self, value):
        return self._set(nfolds=value)

    def setAllStringColumnsToCategorical(self, value):
        return self._set(allStringColumnsToCategorical=value)

    def setColumnsToCategorical(self, value, *args):
        assert_is_type(value, [str], str)

        if isinstance(value, str):
            prepared_array = [value]
        else:
            prepared_array = value

        for arg in args:
            prepared_array.append(arg)

        return self._set(columnsToCategorical=value)

    # Setters for parameters which are defined on MOJO as well
    def setPredictionCol(self, value):
        return self._set(predictionCol=value)

    def setDetailedPredictionCol(self, value):
        return self._set(detailedPredictionCol=value)

    def setWithDetailedPredictionCol(self, value):
        return self._set(withDetailedPredictionCol=value)

    def setFeaturesCols(self, value):
        return self._set(featuresCols=value)

    def setConvertUnknownCategoricalLevelsToNa(self, value):
        return self._set(convertUnknownCategoricalLevelsToNa=value)

    def setConvertInvalidNumbersToNa(self, value):
        return self._set(convertInvalidNumbersToNa=value)

    def setNamedMojoOutputColumns(self, value):
        return self._set(namedMojoOutputColumns=value)
Пример #5
0
class H2OKMeansParams(H2OAlgoUnsupervisedParams):
    maxIterations = Param(
        Params._dummy(), "maxIterations",
        "Maximum number of KMeans iterations to find the centroids.",
        H2OTypeConverters.toInt())

    standardize = Param(
        Params._dummy(), "standardize",
        "Standardize the numeric columns to have a mean of zero and unit variance.",
        H2OTypeConverters.toBoolean())

    init = Param(
        Params._dummy(), "init",
        "Initialization mode for finding the initial cluster centers.",
        H2OTypeConverters.toEnumString("hex.kmeans.KMeans$Initialization"))

    userPoints = Param(
        Params._dummy(), "userPoints",
        "This option enables to specify array of points, where each point represents coordinates of "
        "an initial cluster center. The user-specified points must have the same number of columns "
        "as the training observations. The number of rows must equal the number of clusters.",
        H2OTypeConverters.toNullableListListFloat())

    estimateK = Param(
        Params._dummy(), "estimateK",
        "If enabled, the algorithm tries to identify optimal number of clusters, up to k clusters.",
        H2OTypeConverters.toBoolean())

    k = Param(Params._dummy(), "k", "Number of clusters to generate.",
              H2OTypeConverters.toInt())

    quantileAlpha = Param(
        Params._dummy(), "quantileAlpha",
        "Desired quantile for Quantile regression, must be between 0 and 1.",
        H2OTypeConverters.toFloat())

    tweediePower = Param(
        Params._dummy(), "tweediePower",
        "Tweedie power for Tweedie regression, must be between 1 and 2.",
        H2OTypeConverters.toFloat())

    maxCategoricalLevels = Param(
        Params._dummy(), "maxCategoricalLevels",
        "For every categorical feature, only use this many most frequent categorical levels for model training. "
        "Only used for categorical_encoding == EnumLimited.",
        H2OTypeConverters.toInt())

    ignoredCols = Param(Params._dummy(), "ignoredCols",
                        "Names of columns to ignore for training.",
                        H2OTypeConverters.toNullableListString())

    ignoreConstCols = Param(Params._dummy(), "ignoreConstCols",
                            "Ignore constant columns.",
                            H2OTypeConverters.toBoolean())

    scoreEachIteration = Param(
        Params._dummy(), "scoreEachIteration",
        "Whether to score during each iteration of model training.",
        H2OTypeConverters.toBoolean())

    customDistributionFunc = Param(
        Params._dummy(), "customDistributionFunc",
        "Reference to custom distribution, format: `language:keyName=funcName`",
        H2OTypeConverters.toNullableString())

    customMetricFunc = Param(
        Params._dummy(), "customMetricFunc",
        "Reference to custom evaluation function, format: `language:keyName=funcName`",
        H2OTypeConverters.toNullableString())

    exportCheckpointsDir = Param(
        Params._dummy(), "exportCheckpointsDir",
        "Automatically export generated models to this directory.",
        H2OTypeConverters.toNullableString())

    stoppingRounds = Param(
        Params._dummy(), "stoppingRounds",
        "Early stopping based on convergence of stopping_metric. Stop if simple moving average of length k of"
        " the stopping_metric does not improve for k:=stopping_rounds scoring events (0 to disable)",
        H2OTypeConverters.toInt())

    maxRuntimeSecs = Param(
        Params._dummy(), "maxRuntimeSecs",
        "Maximum allowed runtime in seconds for model training. Use 0 to disable.",
        H2OTypeConverters.toFloat())

    clusterSizeConstraints = Param(
        Params._dummy(), "clusterSizeConstraints",
        "An array specifying the minimum number of points that should be in each cluster. The length of the constraints"
        " array has to be the same as the number of clusters.",
        H2OTypeConverters.toNullableListFloat())

    stoppingTolerance = Param(
        Params._dummy(), "stoppingTolerance",
        "Relative tolerance for metric-based stopping criterion (stop if relative improvement is not"
        " at least this much)", H2OTypeConverters.toFloat())

    foldAssignment = Param(
        Params._dummy(), "foldAssignment",
        "Cross-validation fold assignment scheme, if fold_column is not specified. The 'Stratified' option will "
        "stratify the folds based on the response variable, for classification problems.",
        H2OTypeConverters.toEnumString(
            "hex.Model$Parameters$FoldAssignmentScheme"))

    categoricalEncoding = Param(
        Params._dummy(), "categoricalEncoding",
        "Encoding scheme for categorical features",
        H2OTypeConverters.toEnumString(
            "hex.Model$Parameters$CategoricalEncodingScheme"))

    stoppingMetric = Param(
        Params._dummy(), "stoppingMetric",
        "Metric to use for early stopping (AUTO: logloss for classification, deviance for regression and"
        " anonomaly_score for Isolation Forest). Note that custom and custom_increasing can only be used"
        " in GBM and DRF with the Python client.",
        H2OTypeConverters.toEnumString("hex.ScoreKeeper$StoppingMetric"))

    huberAlpha = Param(
        Params._dummy(), "huberAlpha",
        "Desired quantile for Huber/M-regression (threshold between quadratic and linear loss,"
        " must be between 0 and 1).", H2OTypeConverters.toFloat())

    keepCrossValidationModels = Param(
        Params._dummy(), "keepCrossValidationModels",
        "Whether to keep the cross-validation models.",
        H2OTypeConverters.toBoolean())

    #
    # Getters
    #
    def getMaxIterations(self):
        return self.getOrDefault(self.maxIterations)

    def getStandardize(self):
        return self.getOrDefault(self.standardize)

    def getInit(self):
        return self.getOrDefault(self.init)

    def getUserPoints(self):
        return self.getOrDefault(self.userPoints)

    def getEstimateK(self):
        return self.getOrDefault(self.estimateK)

    def getK(self):
        return self.getOrDefault(self.k)

    def getQuantileAlpha(self):
        return self.getOrDefault(self.quantileAlpha)

    def getTweediePower(self):
        return self.getOrDefault(self.tweediePower)

    def getMaxCategoricalLevels(self):
        return self.getOrDefault(self.maxCategoricalLevels)

    def getIgnoredCols(self):
        return self.getOrDefault(self.ignoredCols)

    def getIgnoreConstCols(self):
        return self.getOrDefault(self.ignoreConstCols)

    def getScoreEachIteration(self):
        return self.getOrDefault(self.scoreEachIteration)

    def getCustomDistributionFunc(self):
        return self.getOrDefault(self.customDistributionFunc)

    def getCustomMetricFunc(self):
        return self.getOrDefault(self.customMetricFunc)

    def getExportCheckpointsDir(self):
        return self.getOrDefault(self.exportCheckpointsDir)

    def getStoppingRounds(self):
        return self.getOrDefault(self.stoppingRounds)

    def getMaxRuntimeSecs(self):
        return self.getOrDefault(self.maxRuntimeSecs)

    def getClusterSizeConstraints(self):
        return self.getOrDefault(self.clusterSizeConstraints)

    def getStoppingTolerance(self):
        return self.getOrDefault(self.stoppingTolerance)

    def getFoldAssignment(self):
        return self.getOrDefault(self.foldAssignment)

    def getCategoricalEncoding(self):
        return self.getOrDefault(self.categoricalEncoding)

    def getStoppingMetric(self):
        return self.getOrDefault(self.stoppingMetric)

    def getHuberAlpha(self):
        return self.getOrDefault(self.huberAlpha)

    def getKeepCrossValidationModels(self):
        return self.getOrDefault(self.keepCrossValidationModels)

    #
    # Setters
    #
    def setMaxIterations(self, value):
        return self._set(maxIterations=value)

    def setStandardize(self, value):
        return self._set(standardize=value)

    def setInit(self, value):
        return self._set(init=value)

    def setUserPoints(self, value):
        return self._set(userPoints=value)

    def setEstimateK(self, value):
        return self._set(estimateK=value)

    def setK(self, value):
        return self._set(k=value)

    def setQuantileAlpha(self, value):
        return self._set(quantileAlpha=value)

    def setTweediePower(self, value):
        return self._set(tweediePower=value)

    def setMaxCategoricalLevels(self, value):
        return self._set(maxCategoricalLevels=value)

    def setIgnoredCols(self, value):
        return self._set(ignoredCols=value)

    def setIgnoreConstCols(self, value):
        return self._set(ignoreConstCols=value)

    def setScoreEachIteration(self, value):
        return self._set(scoreEachIteration=value)

    def setCustomDistributionFunc(self, value):
        return self._set(customDistributionFunc=value)

    def setCustomMetricFunc(self, value):
        return self._set(customMetricFunc=value)

    def setExportCheckpointsDir(self, value):
        return self._set(exportCheckpointsDir=value)

    def setStoppingRounds(self, value):
        return self._set(stoppingRounds=value)

    def setMaxRuntimeSecs(self, value):
        return self._set(maxRuntimeSecs=value)

    def setClusterSizeConstraints(self, value):
        return self._set(clusterSizeConstraints=value)

    def setStoppingTolerance(self, value):
        return self._set(stoppingTolerance=value)

    def setFoldAssignment(self, value):
        return self._set(foldAssignment=value)

    def setCategoricalEncoding(self, value):
        return self._set(categoricalEncoding=value)

    def setStoppingMetric(self, value):
        return self._set(stoppingMetric=value)

    def setHuberAlpha(self, value):
        return self._set(huberAlpha=value)

    def setKeepCrossValidationModels(self, value):
        return self._set(keepCrossValidationModels=value)
Пример #6
0
class H2OGBMParams(H2OSharedTreeParams, HasMonotoneConstraints,
                   HasQuantileAlpha):
    ##
    # Param definitions
    ##
    learnRate = Param(Params._dummy(), "learnRate",
                      "Learning rate (from 0.0 to 1.0)",
                      H2OTypeConverters.toFloat())

    learnRateAnnealing = Param(
        Params._dummy(), "learnRateAnnealing",
        "Scale the learning rate by this factor after each tree (e.g., 0.99 or 0.999)",
        H2OTypeConverters.toFloat())

    colSampleRate = Param(Params._dummy(), "colSampleRate",
                          "Column sample rate (from 0.0 to 1.0)",
                          H2OTypeConverters.toFloat())

    maxAbsLeafnodePred = Param(
        Params._dummy(), "maxAbsLeafnodePred",
        "Maximum absolute value of a leaf node prediction",
        H2OTypeConverters.toFloat())

    predNoiseBandwidth = Param(
        Params._dummy(), "predNoiseBandwidth",
        "Bandwidth (sigma) of Gaussian multiplicative noise ~N(1,sigma) for tree node predictions",
        H2OTypeConverters.toFloat())

    classSamplingFactors = Param(
        Params._dummy(), "classSamplingFactors",
        "Desired over/under-sampling ratios per class (in lexicographic order). If not specified, sampling factors "
        "will be automatically computed to obtain class balance during training. Requires balance_classes.",
        H2OTypeConverters.toNullableListFloat())

    checkConstantResponse = Param(
        Params._dummy(), "checkConstantResponse",
        "Check if response column is constant. If enabled, then an exception is thrown if the response column "
        "is a constant value.If disabled, then model will train regardless of the response column being a constant "
        "value or not.", H2OTypeConverters.toBoolean())

    customDistributionFunc = Param(
        Params._dummy(), "customDistributionFunc",
        "Reference to custom distribution, format: `language:keyName=funcName`",
        H2OTypeConverters.toNullableString())

    customMetricFunc = Param(
        Params._dummy(), "customMetricFunc",
        "Reference to custom evaluation function, format: `language:keyName=funcName`",
        H2OTypeConverters.toNullableString())

    maxRuntimeSecs = Param(
        Params._dummy(), "maxRuntimeSecs",
        "Maximum allowed runtime in seconds for model training. Use 0 to disable.",
        H2OTypeConverters.toFloat())

    foldAssignment = Param(
        Params._dummy(), "foldAssignment",
        "Cross-validation fold assignment scheme, if fold_column is not specified. The 'Stratified' option will "
        "stratify the folds based on the response variable, for classification problems.",
        H2OTypeConverters.toEnumString(
            "hex.Model$Parameters$FoldAssignmentScheme"))

    exportCheckpointsDir = Param(
        Params._dummy(), "exportCheckpointsDir",
        "Automatically export generated models to this directory.",
        H2OTypeConverters.toNullableString())

    maxAfterBalanceSize = Param(
        Params._dummy(), "maxAfterBalanceSize",
        "Maximum relative size of the training data after balancing class counts (can be less than 1.0). "
        "Requires balance_classes.", H2OTypeConverters.toFloat())

    calibrateModel = Param(
        Params._dummy(), "calibrateModel",
        "Use Platt Scaling to calculate calibrated class probabilities. Calibration can provide more accurate "
        "estimates of class probabilities.", H2OTypeConverters.toBoolean())

    ignoredCols = Param(Params._dummy(), "ignoredCols",
                        "Names of columns to ignore for training.",
                        H2OTypeConverters.toNullableListString())

    ignoreConstCols = Param(Params._dummy(), "ignoreConstCols",
                            "Ignore constant columns.",
                            H2OTypeConverters.toBoolean())

    balanceClasses = Param(
        Params._dummy(), "balanceClasses",
        "Balance training data class counts via over/under-sampling (for imbalanced data).",
        H2OTypeConverters.toBoolean())

    huberAlpha = Param(
        Params._dummy(), "huberAlpha",
        "Desired quantile for Huber/M-regression (threshold between quadratic and linear loss,"
        " must be between 0 and 1).", H2OTypeConverters.toFloat())

    tweediePower = Param(
        Params._dummy(), "tweediePower",
        "Tweedie power for Tweedie regression, must be between 1 and 2.",
        H2OTypeConverters.toFloat())

    scoreEachIteration = Param(
        Params._dummy(), "scoreEachIteration",
        "Whether to score during each iteration of model training.",
        H2OTypeConverters.toBoolean())

    categoricalEncoding = Param(
        Params._dummy(), "categoricalEncoding",
        "Encoding scheme for categorical features",
        H2OTypeConverters.toEnumString(
            "hex.Model$Parameters$CategoricalEncodingScheme"))

    maxCategoricalLevels = Param(
        Params._dummy(), "maxCategoricalLevels",
        "For every categorical feature, only use this many most frequent categorical levels for model training. "
        "Only used for categorical_encoding == EnumLimited.",
        H2OTypeConverters.toInt())

    keepCrossValidationModels = Param(
        Params._dummy(), "keepCrossValidationModels",
        "Whether to keep the cross-validation models.",
        H2OTypeConverters.toBoolean())

    balanceClasses = Param(
        Params._dummy(), "balanceClasses",
        "Balance training data class counts via over/under-sampling (for imbalanced data).",
        H2OTypeConverters.toBoolean())

    ##
    # Getters
    ##
    def getLearnRate(self):
        return self.getOrDefault(self.learnRate)

    def getLearnRateAnnealing(self):
        return self.getOrDefault(self.learnRateAnnealing)

    def getColSampleRate(self):
        return self.getOrDefault(self.colSampleRate)

    def getMaxAbsLeafnodePred(self):
        return self.getOrDefault(self.maxAbsLeafnodePred)

    def getPredNoiseBandwidth(self):
        return self.getOrDefault(self.predNoiseBandwidth)

    def getClassSamplingFactors(self):
        return self.getOrDefault(self.classSamplingFactors)

    def getCheckConstantResponse(self):
        return self.getOrDefault(self.checkConstantResponse)

    def getCustomDistributionFunc(self):
        return self.getOrDefault(self.customDistributionFunc)

    def getCustomMetricFunc(self):
        return self.getOrDefault(self.customMetricFunc)

    def getMaxRuntimeSecs(self):
        return self.getOrDefault(self.maxRuntimeSecs)

    def getFoldAssignment(self):
        return self.getOrDefault(self.foldAssignment)

    def getExportCheckpointsDir(self):
        return self.getOrDefault(self.exportCheckpointsDir)

    def getMaxAfterBalanceSize(self):
        return self.getOrDefault(self.maxAfterBalanceSize)

    def getCalibrateModel(self):
        return self.getOrDefault(self.calibrateModel)

    def getIgnoredCols(self):
        return self.getOrDefault(self.ignoredCols)

    def getIgnoreConstCols(self):
        return self.getOrDefault(self.ignoreConstCols)

    def getBalanceClasses(self):
        return self.getOrDefault(self.balanceClasses)

    def getHuberAlpha(self):
        return self.getOrDefault(self.huberAlpha)

    def getTweediePower(self):
        return self.getOrDefault(self.tweediePower)

    def getScoreEachIteration(self):
        return self.getOrDefault(self.scoreEachIteration)

    def getCategoricalEncoding(self):
        return self.getOrDefault(self.categoricalEncoding)

    def getMaxCategoricalLevels(self):
        return self.getOrDefault(self.maxCategoricalLevels)

    def getKeepCrossValidationModels(self):
        return self.getOrDefault(self.keepCrossValidationModels)

    def getBalanceClasses(self):
        return self.getOrDefault(self.balanceClasses)

    ##
    # Setters
    ##
    def setLearnRate(self, value):
        return self._set(learnRate=value)

    def setLearnRateAnnealing(self, value):
        return self._set(learnRateAnnealing=value)

    def setColSampleRate(self, value):
        return self._set(colSampleRate=value)

    def setMaxAbsLeafnodePred(self, value):
        return self._set(maxAbsLeafnodePred=value)

    def setPredNoiseBandwidth(self, value):
        return self._set(predNoiseBandwidth=value)

    def setClassSamplingFactors(self, value):
        return self._set(classSamplingFactors=value)

    def setCheckConstantResponse(self, value):
        return self._set(checkConstantResponse=value)

    def setCustomDistributionFunc(self, value):
        return self._set(customDistributionFunc=value)

    def setCustomMetricFunc(self, value):
        return self._set(customMetricFunc=value)

    def setMaxRuntimeSecs(self, value):
        return self._set(maxRuntimeSecs=value)

    def setFoldAssignment(self, value):
        return self._set(foldAssignment=value)

    def setExportCheckpointsDir(self, value):
        return self._set(exportCheckpointsDir=value)

    def setMaxAfterBalanceSize(self, value):
        return self._set(maxAfterBalanceSize=value)

    def setCalibrateModel(self, value):
        return self._set(calibrateModel=value)

    def setIgnoredCols(self, value):
        return self._set(ignoredCols=value)

    def setIgnoreConstCols(self, value):
        return self._set(ignoreConstCols=value)

    def setBalanceClasses(self, value):
        return self._set(balanceClasses=value)

    def setHuberAlpha(self, value):
        return self._set(huberAlpha=value)

    def setTweediePower(self, value):
        return self._set(tweediePower=value)

    def setScoreEachIteration(self, value):
        return self._set(scoreEachIteration=value)

    def setCategoricalEncoding(self, value):
        return self._set(categoricalEncoding=value)

    def setMaxCategoricalLevels(self, value):
        return self._set(maxCategoricalLevels=value)

    def setKeepCrossValidationModels(self, value):
        return self._set(keepCrossValidationModels=value)

    def setBalanceClasses(self, value):
        return self._set(balanceClasses=value)
Пример #7
0
class H2OAutoMLParams(H2OCommonSupervisedParams, HasMonotoneConstraints):
    ##
    # Param definitions
    ##
    ignoredCols = Param(
        Params._dummy(),
        "ignoredCols",
        "Ignored column names",
        H2OTypeConverters.toNullableListString())

    includeAlgos = Param(
        Params._dummy(),
        "includeAlgos",
        "Algorithms to include when using automl",
        H2OTypeConverters.toEnumListString("ai.h2o.automl.Algo", True))

    excludeAlgos = Param(
        Params._dummy(),
        "excludeAlgos",
        "Algorithms to exclude when using automl",
        H2OTypeConverters.toEnumListString("ai.h2o.automl.Algo", True))

    projectName = Param(
        Params._dummy(),
        "projectName",
        "identifier for models that should be grouped together in the leaderboard "
        "(e.g., airlines and iris)",
        H2OTypeConverters.toNullableString())

    maxRuntimeSecs = Param(
        Params._dummy(),
        "maxRuntimeSecs",
        "Maximum time in seconds for automl to be running",
        H2OTypeConverters.toFloat())

    stoppingRounds = Param(
        Params._dummy(),
        "stoppingRounds",
        "Stopping rounds",
        H2OTypeConverters.toInt())

    stoppingTolerance = Param(
        Params._dummy(),
        "stoppingTolerance",
        "Stopping tolerance",
        H2OTypeConverters.toFloat())

    stoppingMetric = Param(
        Params._dummy(),
        "stoppingMetric",
        "Stopping metric",
        H2OTypeConverters.toEnumString("hex.ScoreKeeper$StoppingMetric"))

    sortMetric = Param(
        Params._dummy(),
        "sortMetric",
        "Sort metric for the AutoML leaderboard",
        H2OTypeConverters.toEnumString("ai.h2o.sparkling.ml.utils.H2OAutoMLSortMetric"))

    balanceClasses = Param(
        Params._dummy(),
        "balanceClasses",
        "Balance classes",
        H2OTypeConverters.toBoolean())

    classSamplingFactors = Param(
        Params._dummy(),
        "classSamplingFactors",
        "Class sampling factors",
        H2OTypeConverters.toNullableListFloat())

    maxAfterBalanceSize = Param(
        Params._dummy(),
        "maxAfterBalanceSize",
        "Max after balance size",
        H2OTypeConverters.toFloat())

    keepCrossValidationPredictions = Param(
        Params._dummy(),
        "keepCrossValidationPredictions",
        "Keep cross validation predictions",
        H2OTypeConverters.toBoolean())

    keepCrossValidationModels = Param(
        Params._dummy(),
        "keepCrossValidationModels",
        "Keep cross validation models",
        H2OTypeConverters.toBoolean())

    keepCrossValidationFoldAssignment = Param(
        Params._dummy(),
        "keepCrossValidationFoldAssignment",
        "Whether to keep cross-validation assignments.",
        H2OTypeConverters.toBoolean())

    maxModels = Param(
        Params._dummy(),
        "maxModels",
        "Max models to train in AutoML",
        H2OTypeConverters.toInt())

    maxRuntimeSecsPerModel = Param(
        Params._dummy(),
        "maxRuntimeSecsPerModel",
        "Maximum time to spend on each individual model (optional).",
        H2OTypeConverters.toFloat())

    exportCheckpointsDir = Param(
        Params._dummy(),
        "exportCheckpointsDir",
        "Path to a directory where every generated model will be stored.",
        H2OTypeConverters.toNullableString())

    exploitationRatio = Param(
        Params._dummy(),
        "exploitationRatio",
        "The budget ratio (between 0 and 1) dedicated to the exploitation (vs exploration) phase.",
        H2OTypeConverters.toFloat())

    ##
    # Getters
    ##
    def getIgnoredCols(self):
        return self.getOrDefault(self.ignoredCols)

    def getTryMutations(self):
        return self.getOrDefault(self.tryMutations)

    def getExcludeAlgos(self):
        return self.getOrDefault(self.excludeAlgos)

    def getIncludeAlgos(self):
        return self.getOrDefault(self.includeAlgos)

    def getProjectName(self):
        return self.getOrDefault(self.projectName)

    def getMaxRuntimeSecs(self):
        return self.getOrDefault(self.maxRuntimeSecs)

    def getStoppingRounds(self):
        return self.getOrDefault(self.stoppingRounds)

    def getStoppingTolerance(self):
        return self.getOrDefault(self.stoppingTolerance)

    def getStoppingMetric(self):
        return self.getOrDefault(self.stoppingMetric)

    def getSortMetric(self):
        return self.getOrDefault(self.sortMetric)

    def getBalanceClasses(self):
        return self.getOrDefault(self.balanceClasses)

    def getClassSamplingFactors(self):
        return self.getOrDefault(self.classSamplingFactors)

    def getMaxAfterBalanceSize(self):
        return self.getOrDefault(self.maxAfterBalanceSize)

    def getKeepCrossValidationPredictions(self):
        return self.getOrDefault(self.keepCrossValidationPredictions)

    def getKeepCrossValidationModels(self):
        return self.getOrDefault(self.keepCrossValidationModels)

    def getKeepCrossValidationFoldAssignment(self):
        return self.getOrDefault(self.keepCrossValidationFoldAssignment)


    def getMaxModels(self):
        return self.getOrDefault(self.maxModels)

    def getMaxRuntimeSecsPerModel(self):
        return self.getOrDefault(self.maxRuntimeSecsPerModel)

    def getExportCheckpointsDir(self):
        return self.getOrDefault(self.exportCheckpointsDir)

    def getExploitationRatio(self):
        return self.getOrDefault(self.exploitationRatio)

    ##
    # Setters
    ##
    def setIgnoredCols(self, value):
        return self._set(ignoredCols=value)

    def setTryMutations(self, value):
        return self._set(tryMutations=value)

    def setIncludeAlgos(self, value):
        return self._set(includeAlgos=value)

    def setExcludeAlgos(self, value):
        return self._set(excludeAlgos=value)

    def setProjectName(self, value):
        return self._set(projectName=value)

    def setMaxRuntimeSecs(self, value):
        return self._set(maxRuntimeSecs=value)

    def setStoppingRounds(self, value):
        return self._set(stoppingRounds=value)

    def setStoppingTolerance(self, value):
        return self._set(stoppingTolerance=value)

    def setStoppingMetric(self, value):
        return self._set(stoppingMetric=value)

    def setSortMetric(self, value):
        return self._set(sortMetric=value)

    def setBalanceClasses(self, value):
        return self._set(balanceClasses=value)

    def setClassSamplingFactors(self, value):
        return self._set(classSamplingFactors=value)

    def setMaxAfterBalanceSize(self, value):
        return self._set(maxAfterBalanceSize=value)

    def setKeepCrossValidationPredictions(self, value):
        return self._set(keepCrossValidationPredictions=value)

    def setKeepCrossValidationModels(self, value):
        return self._set(keepCrossValidationModels=value)

    def setKeepCrossValidationFoldAssignment(self, value):
        return self._set(keepCrossValidationFoldAssignment=value)

    def setMaxModels(self, value):
        return self._set(maxModels=value)

    def setMaxRuntimeSecsPerModel(self, value):
        return self._set(maxRuntimeSecsPerModel=value)

    def setExportCheckpointsDir(self, value):
        return self._set(exportCheckpointsDir=value)

    def setExploitationRatio(self, value):
        return self._set(exploitationRatio=value)
class H2OTargetEncoderParams(Params):
    ##
    # Param definitions
    ##
    foldCol = Param(Params._dummy(), "foldCol", "Fold column name",
                    H2OTypeConverters.toNullableString())

    labelCol = Param(Params._dummy(), "labelCol", "Label column name",
                     H2OTypeConverters.toString())

    inputCols = Param(Params._dummy(), "inputCols",
                      "Names of columns that will be transformed",
                      H2OTypeConverters.toListString())

    outputCols = Param(
        Params._dummy(), "outputCols",
        "Names of columns representing the result of target encoding",
        H2OTypeConverters.toListString())

    holdoutStrategy = Param(
        Params._dummy(), "holdoutStrategy",
        """A strategy deciding what records will be excluded when calculating the target average on the training dataset.
           Options:
            None        - All rows are considered for the calculation
            LeaveOneOut - All rows except the row the calculation is made for
            KFold       - Only out-of-fold data is considered (The option requires foldCol to be set.""",
        H2OTypeConverters.toEnumString(
            "ai.h2o.targetencoding.TargetEncoder$DataLeakageHandlingStrategy"))

    blendedAvgEnabled = Param(
        Params._dummy(), "blendedAvgEnabled",
        "If set, the target average becomes a weighted average of the posterior average for a given "
        "categorical level and the prior average of the target. The weight is determined by the size "
        "of the given group that the row belongs to. By default, the blended average is disabled.",
        H2OTypeConverters.toBoolean())

    blendedAvgInflectionPoint = Param(
        Params._dummy(), "blendedAvgInflectionPoint",
        "A parameter of the blended average. The bigger number is set, the groups relatively bigger to the "
        "overall data set size will consider the global target value as a component in the weighted average. "
        "The default value is 10."
        "", H2OTypeConverters.toFloat())

    blendedAvgSmoothing = Param(
        Params._dummy(), "blendedAvgSmoothing",
        "A parameter of blended average. Controls the rate of transition between a group target value "
        "and a global target value. The default value is 20.",
        H2OTypeConverters.toFloat())

    noise = Param(
        Params._dummy(), "noise",
        "Amount of random noise added to output values. The default value is 0.01",
        H2OTypeConverters.toFloat())

    noiseSeed = Param(Params._dummy(), "noiseSeed",
                      "A seed of the generator producing the random noise",
                      H2OTypeConverters.toInt())

    ##
    # Getters
    ##
    def getFoldCol(self):
        return self.getOrDefault(self.foldCol)

    def getLabelCol(self):
        return self.getOrDefault(self.labelCol)

    def getInputCols(self):
        return self.getOrDefault(self.inputCols)

    def getOutputCols(self):
        columns = self.getOrDefault(self.outputCols)
        if not columns:
            return list(map(lambda c: c + "_te", self.getInputCols()))
        else:
            return columns

    def getHoldoutStrategy(self):
        return self.getOrDefault(self.holdoutStrategy)

    def getBlendedAvgEnabled(self):
        return self.getOrDefault(self.blendedAvgEnabled)

    def getBlendedAvgInflectionPoint(self):
        return self.getOrDefault(self.blendedAvgInflectionPoint)

    def getBlendedAvgSmoothing(self):
        return self.getOrDefault(self.blendedAvgSmoothing)

    def getNoise(self):
        return self.getOrDefault(self.noise)

    def getNoiseSeed(self):
        return self.getOrDefault(self.noiseSeed)
Пример #9
0
class H2OAlgoCommonParams:
    ##
    # Param definitions
    ##
    modelId = Param(
        Params._dummy(), "modelId",
        "An unique identifier of a trained model. If the id already exists, a number will be appended to "
        "ensure uniqueness.", H2OTypeConverters.toNullableString())

    keepCrossValidationPredictions = Param(
        Params._dummy(), "keepCrossValidationPredictions",
        "Whether to keep the predictions of the cross-validation models",
        H2OTypeConverters.toBoolean())

    keepCrossValidationFoldAssignment = Param(
        Params._dummy(), "keepCrossValidationFoldAssignment",
        "Whether to keep the cross-validation fold assignment",
        H2OTypeConverters.toBoolean())

    parallelizeCrossValidation = Param(
        Params._dummy(), "parallelizeCrossValidation",
        "Allow parallel training of cross-validation models",
        H2OTypeConverters.toBoolean())

    distribution = Param(
        Params._dummy(), "distribution", "Distribution function",
        H2OTypeConverters.toEnumString(
            "hex.genmodel.utils.DistributionFamily"))

    ##
    # Getters
    ##
    def getModelId(self):
        return self.getOrDefault(self.modelId)

    def getKeepCrossValidationPredictions(self):
        return self.getOrDefault(self.keepCrossValidationPredictions)

    def getKeepCrossValidationFoldAssignment(self):
        return self.getOrDefault(self.keepCrossValidationFoldAssignment)

    def getParallelizeCrossValidation(self):
        return self.getOrDefault(self.parallelizeCrossValidation)

    def getDistribution(self):
        return self.getOrDefault(self.distribution)

    ##
    # Setters
    ##
    def setModelId(self, value):
        return self._set(modelId=value)

    def setKeepCrossValidationPredictions(self, value):
        return self._set(keepCrossValidationPredictions=value)

    def setKeepCrossValidationFoldAssignment(self, value):
        return self._set(keepCrossValidationFoldAssignment=value)

    def setParallelizeCrossValidation(self, value):
        return self._set(parallelizeCrossValidation=value)

    def setDistribution(self, value):
        return self._set(distribution=value)
Пример #10
0
class H2OGLMParams(H2OAlgoSupervisedParams):
    ##
    # Param definitions
    ##
    standardize = Param(Params._dummy(), "standardize", "standardize",
                        H2OTypeConverters.toBoolean())

    family = Param(
        Params._dummy(), "family", "family",
        H2OTypeConverters.toEnumString(
            "hex.glm.GLMModel$GLMParameters$Family"))

    link = Param(
        Params._dummy(), "link", "link",
        H2OTypeConverters.toEnumString("hex.glm.GLMModel$GLMParameters$Link"))

    solver = Param(
        Params._dummy(), "solver", "solver",
        H2OTypeConverters.toEnumString(
            "hex.glm.GLMModel$GLMParameters$Solver"))

    tweedieVariancePower = Param(Params._dummy(), "tweedieVariancePower",
                                 "Tweedie variance power",
                                 H2OTypeConverters.toFloat())

    tweedieLinkPower = Param(Params._dummy(), "tweedieLinkPower",
                             "Tweedie link power", H2OTypeConverters.toFloat())

    alphaValue = Param(Params._dummy(), "alphaValue", "alphaValue",
                       H2OTypeConverters.toNullableListFloat())

    lambdaValue = Param(Params._dummy(), "lambdaValue", "lambdaValue",
                        H2OTypeConverters.toNullableListFloat())

    missingValuesHandling = Param(
        Params._dummy(), "missingValuesHandling", "missingValuesHandling",
        H2OTypeConverters.toEnumString(
            "hex.deeplearning.DeepLearningModel$DeepLearningParameters$MissingValuesHandling"
        ))

    prior = Param(Params._dummy(), "prior", "prior",
                  H2OTypeConverters.toFloat())

    lambdaSearch = Param(Params._dummy(), "lambdaSearch", "lambda search",
                         H2OTypeConverters.toBoolean())

    nlambdas = Param(Params._dummy(), "nlambdas", "nlambdas",
                     H2OTypeConverters.toInt())

    nonNegative = Param(Params._dummy(), "nonNegative", "nonNegative",
                        H2OTypeConverters.toBoolean())

    lambdaMinRatio = Param(Params._dummy(), "lambdaMinRatio", "lambdaMinRatio",
                           H2OTypeConverters.toFloat())

    maxIterations = Param(Params._dummy(), "maxIterations", "maxIterations",
                          H2OTypeConverters.toInt())

    intercept = Param(Params._dummy(), "intercept", "intercept",
                      H2OTypeConverters.toBoolean())

    betaEpsilon = Param(Params._dummy(), "betaEpsilon", "betaEpsilon",
                        H2OTypeConverters.toFloat())

    objectiveEpsilon = Param(Params._dummy(), "objectiveEpsilon",
                             "objectiveEpsilon", H2OTypeConverters.toFloat())

    gradientEpsilon = Param(Params._dummy(), "gradientEpsilon",
                            "gradientEpsilon", H2OTypeConverters.toFloat())

    objReg = Param(Params._dummy(), "objReg", "objReg",
                   H2OTypeConverters.toFloat())

    computePValues = Param(Params._dummy(), "computePValues", "computePValues",
                           H2OTypeConverters.toBoolean())

    removeCollinearCols = Param(Params._dummy(), "removeCollinearCols",
                                "removeCollinearCols",
                                H2OTypeConverters.toBoolean())

    interactions = Param(Params._dummy(), "interactions", "interactions",
                         H2OTypeConverters.toNullableListString())

    interactionPairs = Param(Params._dummy(), "interactionPairs",
                             "interactionPairs")

    earlyStopping = Param(Params._dummy(), "earlyStopping", "earlyStopping",
                          H2OTypeConverters.toBoolean())

    balanceClasses = Param(
        Params._dummy(), "balanceClasses",
        "Balance training data class counts via over/under-sampling (for imbalanced data).",
        H2OTypeConverters.toBoolean())

    quantileAlpha = Param(
        Params._dummy(), "quantileAlpha",
        "Desired quantile for Quantile regression, must be between 0 and 1.",
        H2OTypeConverters.toFloat())

    stoppingMetric = Param(
        Params._dummy(), "stoppingMetric",
        "Metric to use for early stopping (AUTO: logloss for classification, deviance for regression and"
        " anonomaly_score for Isolation Forest). Note that custom and custom_increasing can only be used"
        " in GBM and DRF with the Python client.",
        H2OTypeConverters.toEnumString("hex.ScoreKeeper$StoppingMetric"))

    stoppingTolerance = Param(
        Params._dummy(), "stoppingTolerance",
        "Relative tolerance for metric-based stopping criterion (stop if relative improvement is not"
        " at least this much)", H2OTypeConverters.toFloat())

    stoppingRounds = Param(
        Params._dummy(), "stoppingRounds",
        "Early stopping based on convergence of stopping_metric. Stop if simple moving average of length k of"
        " the stopping_metric does not improve for k:=stopping_rounds scoring events (0 to disable)",
        H2OTypeConverters.toInt())

    categoricalEncoding = Param(
        Params._dummy(), "categoricalEncoding",
        "Encoding scheme for categorical features",
        H2OTypeConverters.toEnumString(
            "hex.Model$Parameters$CategoricalEncodingScheme"))

    exportCheckpointsDir = Param(
        Params._dummy(), "exportCheckpointsDir",
        "Automatically export generated models to this directory.",
        H2OTypeConverters.toNullableString())

    ignoredCols = Param(Params._dummy(), "ignoredCols",
                        "Names of columns to ignore for training.",
                        H2OTypeConverters.toNullableListString())

    ignoreConstCols = Param(Params._dummy(), "ignoreConstCols",
                            "Ignore constant columns.",
                            H2OTypeConverters.toBoolean())

    classSamplingFactors = Param(
        Params._dummy(), "classSamplingFactors",
        "Desired over/under-sampling ratios per class (in lexicographic order). If not specified, sampling factors "
        "will be automatically computed to obtain class balance during training. Requires balance_classes.",
        H2OTypeConverters.toNullableListFloat())

    maxAfterBalanceSize = Param(
        Params._dummy(), "maxAfterBalanceSize",
        "Maximum relative size of the training data after balancing class counts (can be less than 1.0). "
        "Requires balance_classes.", H2OTypeConverters.toFloat())

    maxCategoricalLevels = Param(
        Params._dummy(), "maxCategoricalLevels",
        "For every categorical feature, only use this many most frequent categorical levels for model training. "
        "Only used for categorical_encoding == EnumLimited.",
        H2OTypeConverters.toInt())

    HGLM = Param(
        Params._dummy(), "HGLM",
        "If set to true, will return HGLM model.  Otherwise, normal GLM model will be returned",
        H2OTypeConverters.toBoolean())

    customDistributionFunc = Param(
        Params._dummy(), "customDistributionFunc",
        "Reference to custom distribution, format: `language:keyName=funcName`",
        H2OTypeConverters.toNullableString())

    customMetricFunc = Param(
        Params._dummy(), "customMetricFunc",
        "Reference to custom evaluation function, format: `language:keyName=funcName`",
        H2OTypeConverters.toNullableString())

    startval = Param(
        Params._dummy(), "startval",
        "double array to initialize fixed and random coefficients for HGLM.",
        H2OTypeConverters.toNullableListFloat())

    keepCrossValidationModels = Param(
        Params._dummy(), "keepCrossValidationModels",
        "Whether to keep the cross-validation models.",
        H2OTypeConverters.toBoolean())

    theta = Param(Params._dummy(), "theta", "Theta",
                  H2OTypeConverters.toFloat())

    scoreEachIteration = Param(
        Params._dummy(), "scoreEachIteration",
        "Whether to score during each iteration of model training.",
        H2OTypeConverters.toBoolean())

    tweediePower = Param(
        Params._dummy(), "tweediePower",
        "Tweedie power for Tweedie regression, must be between 1 and 2.",
        H2OTypeConverters.toFloat())

    huberAlpha = Param(
        Params._dummy(), "huberAlpha",
        "Desired quantile for Huber/M-regression (threshold between quadratic and linear loss,"
        " must be between 0 and 1).", H2OTypeConverters.toFloat())

    maxActivePredictors = Param(
        Params._dummy(), "maxActivePredictors",
        "Maximum number of active predictors during computation. Use as a stopping criterion to prevent expensive "
        "model building with many predictors. Default indicates: If the IRLSM solver is used, the value of "
        "max_active_predictors is set to 5000 otherwise it is set to 100000000.",
        H2OTypeConverters.toInt())

    foldAssignment = Param(
        Params._dummy(), "foldAssignment",
        "Cross-validation fold assignment scheme, if fold_column is not specified. The 'Stratified' option will "
        "stratify the folds based on the response variable, for classification problems.",
        H2OTypeConverters.toEnumString(
            "hex.Model$Parameters$FoldAssignmentScheme"))

    calcLike = Param(
        Params._dummy(), "calcLike",
        "if true, will return likelihood function value for HGLM.",
        H2OTypeConverters.toBoolean())

    maxRuntimeSecs = Param(
        Params._dummy(), "maxRuntimeSecs",
        "Maximum allowed runtime in seconds for model training. Use 0 to disable.",
        H2OTypeConverters.toFloat())

    ##
    # Getters
    ##
    def getStandardize(self):
        return self.getOrDefault(self.standardize)

    def getFamily(self):
        return self.getOrDefault(self.family)

    def getLink(self):
        return self.getOrDefault(self.link)

    def getSolver(self):
        return self.getOrDefault(self.solver)

    def getTweedieVariancePower(self):
        return self.getOrDefault(self.tweedieVariancePower)

    def getTweedieLinkPower(self):
        return self.getOrDefault(self.tweedieLinkPower)

    def getAlphaValue(self):
        return self.getOrDefault(self.alphaValue)

    def getLambdaValue(self):
        return self.getOrDefault(self.lambdaValue)

    def getMissingValuesHandling(self):
        return self.getOrDefault(self.missingValuesHandling)

    def getPrior(self):
        return self.getOrDefault(self.prior)

    def getLambdaSearch(self):
        return self.getOrDefault(self.lambdaSearch)

    def getNlambdas(self):
        return self.getOrDefault(self.nlambdas)

    def getNonNegative(self):
        return self.getOrDefault(self.nonNegative)

    def getLambdaMinRatio(self):
        return self.getOrDefault(self.lambdaMinRatio)

    def getMaxIterations(self):
        return self.getOrDefault(self.maxIterations)

    def getIntercept(self):
        return self.getOrDefault(self.intercept)

    def getBetaEpsilon(self):
        return self.getOrDefault(self.betaEpsilon)

    def getObjectiveEpsilon(self):
        return self.getOrDefault(self.objectiveEpsilon)

    def getGradientEpsilon(self):
        return self.getOrDefault(self.gradientEpsilon)

    def getObjReg(self):
        return self.getOrDefault(self.objReg)

    def getComputePValues(self):
        return self.getOrDefault(self.computePValues)

    def getRemoveCollinearCols(self):
        return self.getOrDefault(self.removeCollinearCols)

    def getInteractions(self):
        return self.getOrDefault(self.interactions)

    def getInteractionPairs(self):
        return self.getOrDefault(self.interactionPairs)

    def getEarlyStopping(self):
        return self.getOrDefault(self.earlyStopping)

    def getBalanceClasses(self):
        return self.getOrDefault(self.balanceClasses)

    def getQuantileAlpha(self):
        return self.getOrDefault(self.quantileAlpha)

    def getStoppingMetric(self):
        return self.getOrDefault(self.stoppingMetric)

    def getStoppingTolerance(self):
        return self.getOrDefault(self.stoppingTolerance)

    def getStoppingRounds(self):
        return self.getOrDefault(self.stoppingRounds)

    def getCategoricalEncoding(self):
        return self.getOrDefault(self.categoricalEncoding)

    def getExportCheckpointsDir(self):
        return self.getOrDefault(self.exportCheckpointsDir)

    def getIgnoredCols(self):
        return self.getOrDefault(self.ignoredCols)

    def getIgnoreConstCols(self):
        return self.getOrDefault(self.ignoreConstCols)

    def getClassSamplingFactors(self):
        return self.getOrDefault(self.classSamplingFactors)

    def getMaxCategoricalLevels(self):
        return self.getOrDefault(self.maxCategoricalLevels)

    def getMaxAfterBalanceSize(self):
        return self.getOrDefault(self.maxAfterBalanceSize)

    def getHGLM(self):
        return self.getOrDefault(self.HGLM)

    def getCustomDistributionFunc(self):
        return self.getOrDefault(self.customDistributionFunc)

    def getCustomMetricFunc(self):
        return self.getOrDefault(self.customMetricFunc)

    def getStartval(self):
        return self.getOrDefault(self.startval)

    def getKeepCrossValidationModels(self):
        return self.getOrDefault(self.keepCrossValidationModels)

    def getTheta(self):
        return self.getOrDefault(self.theta)

    def getScoreEachIteration(self):
        return self.getOrDefault(self.scoreEachIteration)

    def getTweediePower(self):
        return self.getOrDefault(self.tweediePower)

    def getHuberAlpha(self):
        return self.getOrDefault(self.huberAlpha)

    def getMaxActivePredictors(self):
        return self.getOrDefault(self.maxActivePredictors)

    def getFoldAssignment(self):
        return self.getOrDefault(self.foldAssignment)

    def getCalcLike(self):
        return self.getOrDefault(self.calcLike)

    def getMaxRuntimeSecs(self):
        return self.getOrDefault(self.maxRuntimeSecs)

    ##
    # Setters
    ##
    def setStandardize(self, value):
        return self._set(standardize=value)

    def setFamily(self, value):
        return self._set(family=value)

    def setLink(self, value):
        return self._set(link=value)

    def setSolver(self, value):
        return self._set(solver=value)

    def setTweedieVariancePower(self, value):
        return self._set(tweedieVariancePower=value)

    def setTweedieLinkPower(self, value):
        return self._set(tweedieLinkPower=value)

    def setAlphaValue(self, value):
        return self._set(alphaValue=value)

    def setLambdaValue(self, value):
        return self._set(lambdaValue=value)

    def setMissingValuesHandling(self, value):
        return self._set(missingValuesHandling=value)

    def setPrior(self, value):
        return self._set(prior=value)

    def setLambdaSearch(self, value):
        return self._set(lambdaSearch=value)

    def setNlambdas(self, value):
        return self._set(nlambdas=value)

    def setNonNegative(self, value):
        return self._set(nonNegative=value)

    def setLambdaMinRatio(self, value):
        return self._set(lambdaMinRatio=value)

    def setMaxIterations(self, value):
        return self._set(maxIterations=value)

    def setIntercept(self, value):
        return self._set(intercept=value)

    def setBetaEpsilon(self, value):
        return self._set(betaEpsilon=value)

    def setObjectiveEpsilon(self, value):
        return self._set(objectiveEpsilon=value)

    def setGradientEpsilon(self, value):
        return self._set(gradientEpsilon=value)

    def setObjReg(self, value):
        return self._set(objReg=value)

    def setComputePValues(self, value):
        return self._set(computePValues=value)

    def setRemoveCollinearCols(self, value):
        return self._set(removeCollinearCols=value)

    def setInteractions(self, value):
        return self._set(interactions=value)

    def setInteractionPairs(self, value):
        assert_is_type(value, None, [(str, str)])
        return self._set(interactionPairs=value)

    def setEarlyStopping(self, value):
        return self._set(earlyStopping=value)

    def setBalanceClasses(self, value):
        return self._set(balanceClasses=value)

    def setQuantileAlpha(self, value):
        return self._set(quantileAlpha=value)

    def setStoppingMetric(self, value):
        return self._set(stoppingMetric=value)

    def setStoppingTolerance(self, value):
        return self._set(stoppingTolerance=value)

    def setStoppingRounds(self, value):
        return self._set(stoppingRounds=value)

    def setCategoricalEncoding(self, value):
        return self._set(categoricalEncoding=value)

    def setExportCheckpointsDir(self, value):
        return self._set(exportCheckpointsDir=value)

    def setIgnoredCols(self, value):
        return self._set(ignoredCols=value)

    def setIgnoreConstCols(self, value):
        return self._set(ignoreConstCols=value)

    def setClassSamplingFactors(self, value):
        return self._set(classSamplingFactors=value)

    def setMaxAfterBalanceSize(self, value):
        return self._set(maxAfterBalanceSize=value)

    def setHGLM(self, value):
        return self._set(HGLM=value)

    def setCustomDistributionFunc(self, value):
        return self._set(customDistributionFunc=value)

    def setCustomMetricFunc(self, value):
        return self._set(customMetricFunc=value)

    def setStartval(self, value):
        return self._set(startval=value)

    def setKeepCrossValidationModels(self, value):
        return self._set(keepCrossValidationModels=value)

    def setTheta(self, value):
        return self._set(theta=value)

    def setScoreEachIteration(self, value):
        return self._set(scoreEachIteration=value)

    def setTweediePower(self, value):
        return self._set(tweediePower=value)

    def setHuberAlpha(self, value):
        return self._set(huberAlpha=value)

    def setMaxActivePredictors(self, value):
        return self._set(maxActivePredictors=value)

    def setFoldAssignment(self, value):
        return self._set(foldAssignment=value)

    def setCalcLike(self, value):
        return self._set(calcLike=value)

    def setMaxRuntimeSecs(self, value):
        return self._set(maxRuntimeSecs=value)

    def setMaxCategoricalLevels(self, value):
        return self._set(maxCategoricalLevels=value)