class H2ODRFParams(H2OSharedTreeParams): ## # Param definitions ## binomialDoubleTrees = Param( Params._dummy(), "binomialDoubleTrees", "In case of binary classification, build 2 times more trees (one per class) - can lead " "to higher accuracy.", H2OTypeConverters.toBoolean()) mtries = Param( Params._dummy(), "mtries", "Number of variables randomly sampled as candidates at each split. If set to -1, defaults " "to sqrt{p} for classification and p/3 for regression (where p is the # of predictors", H2OTypeConverters.toInt()) ## # Getters ## def getBinomialDoubleTrees(self): return self.getOrDefault(self.binomialDoubleTrees) def getMtries(self): return self.getOrDefault(self.mtries) ## # Setters ## def setBinomialDoubleTrees(self, value): return self._set(binomialDoubleTrees=value) def setMtries(self, value): return self._set(mtries=value)
class HasStoppingCriteria(Params): stoppingRounds = Param(Params._dummy(), "stoppingRounds", "Stopping Rounds", H2OTypeConverters.toInt()) stoppingMetric = Param( Params._dummy(), "stoppingMetric", "Stopping Metric", H2OTypeConverters.toEnumString("hex.ScoreKeeper$StoppingMetric")) stoppingTolerance = Param(Params._dummy(), "stoppingTolerance", "Stopping Tolerance", H2OTypeConverters.toFloat()) def getStoppingRounds(self): return self.getOrDefault(self.stoppingRounds) def getStoppingMetric(self): return self.getOrDefault(self.stoppingMetric) def getStoppingTolerance(self): return self.getOrDefault(self.stoppingTolerance) def setStoppingRounds(self, value): return self._set(stoppingRounds=value) def setStoppingMetric(self, value): return self._set(stoppingMetric=value) def setStoppingTolerance(self, value): return self._set(stoppingTolerance=value)
class H2OTreeBasedSupervisedMOJOParams(H2OSupervisedMOJOParams): ntrees = Param(Params._dummy(), "ntrees", "Number of trees representing the model", H2OTypeConverters.toInt()) def getNtrees(self): return self.getOrDefault(self.ntrees)
class H2OAutoMLParams(H2OCommonSupervisedParams, HasMonotoneConstraints): ## # Param definitions ## ignoredCols = Param( Params._dummy(), "ignoredCols", "Ignored column names", H2OTypeConverters.toListString()) includeAlgos = Param( Params._dummy(), "includeAlgos", "Algorithms to include when using automl", H2OTypeConverters.toEnumListString("ai.h2o.automl.Algo")) excludeAlgos = Param( Params._dummy(), "excludeAlgos", "Algorithms to exclude when using automl", H2OTypeConverters.toEnumListString("ai.h2o.automl.Algo")) projectName = Param( Params._dummy(), "projectName", "identifier for models that should be grouped together in the leaderboard " "(e.g., airlines and iris)", H2OTypeConverters.toNullableString()) maxRuntimeSecs = Param( Params._dummy(), "maxRuntimeSecs", "Maximum time in seconds for automl to be running", H2OTypeConverters.toFloat()) stoppingRounds = Param( Params._dummy(), "stoppingRounds", "Stopping rounds", H2OTypeConverters.toInt()) stoppingTolerance = Param( Params._dummy(), "stoppingTolerance", "Stopping tolerance", H2OTypeConverters.toFloat()) stoppingMetric = Param( Params._dummy(), "stoppingMetric", "Stopping metric", H2OTypeConverters.toEnumString("hex.ScoreKeeper$StoppingMetric")) sortMetric = Param( Params._dummy(), "sortMetric", "Sort metric for the AutoML leaderboard", H2OTypeConverters.toEnumString("ai.h2o.sparkling.ml.algos.H2OAutoMLSortMetric")) balanceClasses = Param( Params._dummy(), "balanceClasses", "Balance classes", H2OTypeConverters.toBoolean()) classSamplingFactors = Param( Params._dummy(), "classSamplingFactors", "Class sampling factors", H2OTypeConverters.toNullableListFloat()) maxAfterBalanceSize = Param( Params._dummy(), "maxAfterBalanceSize", "Max after balance size", H2OTypeConverters.toFloat()) keepCrossValidationPredictions = Param( Params._dummy(), "keepCrossValidationPredictions", "Keep cross validation predictions", H2OTypeConverters.toBoolean()) keepCrossValidationModels = Param( Params._dummy(), "keepCrossValidationModels", "Keep cross validation models", H2OTypeConverters.toBoolean()) maxModels = Param( Params._dummy(), "maxModels", "Max models to train in AutoML", H2OTypeConverters.toInt()) ## # Getters ## def getIgnoredCols(self): return self.getOrDefault(self.ignoredCols) def getTryMutations(self): return self.getOrDefault(self.tryMutations) def getExcludeAlgos(self): return self.getOrDefault(self.excludeAlgos) def getIncludeAlgos(self): return self.getOrDefault(self.includeAlgos) def getProjectName(self): return self.getOrDefault(self.projectName) def getMaxRuntimeSecs(self): return self.getOrDefault(self.maxRuntimeSecs) def getStoppingRounds(self): return self.getOrDefault(self.stoppingRounds) def getStoppingTolerance(self): return self.getOrDefault(self.stoppingTolerance) def getStoppingMetric(self): return self.getOrDefault(self.stoppingMetric) def getSortMetric(self): return self.getOrDefault(self.sortMetric) def getBalanceClasses(self): return self.getOrDefault(self.balanceClasses) def getClassSamplingFactors(self): return self.getOrDefault(self.classSamplingFactors) def getMaxAfterBalanceSize(self): return self.getOrDefault(self.maxAfterBalanceSize) def getKeepCrossValidationPredictions(self): return self.getOrDefault(self.keepCrossValidationPredictions) def getKeepCrossValidationModels(self): return self.getOrDefault(self.keepCrossValidationModels) def getMaxModels(self): return self.getOrDefault(self.maxModels) ## # Setters ## def setIgnoredCols(self, value): return self._set(ignoredCols=value) def setTryMutations(self, value): return self._set(tryMutations=value) def setIncludeAlgos(self, value): return self._set(includeAlgos=value) def setExcludeAlgos(self, value): return self._set(excludeAlgos=value) def setProjectName(self, value): return self._set(projectName=value) def setMaxRuntimeSecs(self, value): return self._set(maxRuntimeSecs=value) def setStoppingRounds(self, value): return self._set(stoppingRounds=value) def setStoppingTolerance(self, value): return self._set(stoppingTolerance=value) def setStoppingMetric(self, value): return self._set(stoppingMetric=value) def setSortMetric(self, value): return self._set(sortMetric=value) def setBalanceClasses(self, value): return self._set(balanceClasses=value) def setClassSamplingFactors(self, value): return self._set(classSamplingFactors=value) def setMaxAfterBalanceSize(self, value): return self._set(maxAfterBalanceSize=value) def setKeepCrossValidationPredictions(self, value): return self._set(keepCrossValidationPredictions=value) def setKeepCrossValidationModels(self, value): return self._set(keepCrossValidationModels=value) def setMaxModels(self, value): return self._set(maxModels=value)
class H2OXGBoostParams(H2OAlgoSupervisedParams, H2OTreeBasedSupervisedMOJOParams, HasMonotoneConstraints, HasStoppingCriteria): ## # Param definitions ## quietMode = Param(Params._dummy(), "quietMode", "Quiet mode", H2OTypeConverters.toBoolean()) maxDepth = Param(Params._dummy(), "maxDepth", "Maximal depth", H2OTypeConverters.toInt()) minRows = Param(Params._dummy(), "minRows", "Min rows", H2OTypeConverters.toFloat()) minChildWeight = Param(Params._dummy(), "minChildWeight", "minimal child weight", H2OTypeConverters.toFloat()) learnRate = Param(Params._dummy(), "learnRate", "learn rate", H2OTypeConverters.toFloat()) eta = Param(Params._dummy(), "eta", "eta", H2OTypeConverters.toFloat()) sampleRate = Param(Params._dummy(), "sampleRate", "Sample rate", H2OTypeConverters.toFloat()) subsample = Param(Params._dummy(), "subsample", "subsample", H2OTypeConverters.toFloat()) colSampleRate = Param(Params._dummy(), "colSampleRate", "col sample rate", H2OTypeConverters.toFloat()) colSampleByLevel = Param(Params._dummy(), "colSampleByLevel", "Col Sample By Level", H2OTypeConverters.toFloat()) colSampleRatePerTree = Param(Params._dummy(), "colSampleRatePerTree", "col samle rate", H2OTypeConverters.toFloat()) colSampleByTree = Param(Params._dummy(), "colSampleByTree", "col sample by tree", H2OTypeConverters.toFloat()) colSampleByNode = Param(Params._dummy(), "colSampleByNode", "col sample by node", H2OTypeConverters.toFloat()) maxAbsLeafnodePred = Param(Params._dummy(), "maxAbsLeafnodePred", "max abs lead node prediction", H2OTypeConverters.toFloat()) maxDeltaStep = Param(Params._dummy(), "maxDeltaStep", "max delta step", H2OTypeConverters.toFloat()) scoreTreeInterval = Param(Params._dummy(), "scoreTreeInterval", "score tree interval", H2OTypeConverters.toInt()) minSplitImprovement = Param(Params._dummy(), "minSplitImprovement", "Min split improvement", H2OTypeConverters.toFloat()) gamma = Param(Params._dummy(), "gamma", "gamma", H2OTypeConverters.toFloat()) nthread = Param(Params._dummy(), "nthread", "nthread", H2OTypeConverters.toInt()) maxBins = Param(Params._dummy(), "maxBins", "nbins", H2OTypeConverters.toInt()) maxLeaves = Param(Params._dummy(), "maxLeaves", "max leaves", H2OTypeConverters.toInt()) minSumHessianInLeaf = Param(Params._dummy(), "minSumHessianInLeaf", "min sum hessian in leaf", H2OTypeConverters.toFloat()) minDataInLeaf = Param(Params._dummy(), "minDataInLeaf", "min data in leaf", H2OTypeConverters.toFloat()) treeMethod = Param( Params._dummy(), "treeMethod", "Tree Method", H2OTypeConverters.toEnumString( "hex.tree.xgboost.XGBoostModel$XGBoostParameters$TreeMethod")) growPolicy = Param( Params._dummy(), "growPolicy", "Grow Policy", H2OTypeConverters.toEnumString( "hex.tree.xgboost.XGBoostModel$XGBoostParameters$GrowPolicy")) booster = Param( Params._dummy(), "booster", "Booster", H2OTypeConverters.toEnumString( "hex.tree.xgboost.XGBoostModel$XGBoostParameters$Booster")) dmatrixType = Param( Params._dummy(), "dmatrixType", "DMatrix type", H2OTypeConverters.toEnumString( "hex.tree.xgboost.XGBoostModel$XGBoostParameters$DMatrixType")) regLambda = Param(Params._dummy(), "regLambda", "req lambda", H2OTypeConverters.toFloat()) regAlpha = Param(Params._dummy(), "regAlpha", "req aplha", H2OTypeConverters.toFloat()) sampleType = Param( Params._dummy(), "sampleType", "Dart Sample Type", H2OTypeConverters.toEnumString( "hex.tree.xgboost.XGBoostModel$XGBoostParameters$DartSampleType")) normalizeType = Param( Params._dummy(), "normalizeType", "Dart Normalize Type", H2OTypeConverters.toEnumString( "hex.tree.xgboost.XGBoostModel$XGBoostParameters$DartNormalizeType" )) rateDrop = Param(Params._dummy(), "rateDrop", "rate drop", H2OTypeConverters.toFloat()) oneDrop = Param(Params._dummy(), "oneDrop", "onde drop", H2OTypeConverters.toBoolean()) skipDrop = Param(Params._dummy(), "skipDrop", "skip drop", H2OTypeConverters.toFloat()) gpuId = Param(Params._dummy(), "gpuId", "GPU id", H2OTypeConverters.toInt()) backend = Param( Params._dummy(), "backend", "Backend", H2OTypeConverters.toEnumString( "hex.tree.xgboost.XGBoostModel$XGBoostParameters$Backend")) saveMatrixDirectory = Param( Params._dummy(), "saveMatrixDirectory", "Directory where to save matrices passed to XGBoost library. Useful for debugging.", H2OTypeConverters.toNullableString()) ignoredCols = Param(Params._dummy(), "ignoredCols", "Names of columns to ignore for training.", H2OTypeConverters.toNullableListString()) buildTreeOneNode = Param( Params._dummy(), "buildTreeOneNode", "Run on one node only; no network overhead but fewer cpus used. Suitable for small datasets.", H2OTypeConverters.toBoolean()) maxRuntimeSecs = Param( Params._dummy(), "maxRuntimeSecs", "Maximum allowed runtime in seconds for model training. Use 0 to disable.", H2OTypeConverters.toFloat()) scoreEachIteration = Param( Params._dummy(), "scoreEachIteration", "Whether to score during each iteration of model training.", H2OTypeConverters.toBoolean()) customDistributionFunc = Param( Params._dummy(), "customDistributionFunc", "Reference to custom distribution, format: `language:keyName=funcName`", H2OTypeConverters.toNullableString()) customMetricFunc = Param( Params._dummy(), "customMetricFunc", "Reference to custom evaluation function, format: `language:keyName=funcName`", H2OTypeConverters.toNullableString()) huberAlpha = Param( Params._dummy(), "huberAlpha", "Desired quantile for Huber/M-regression (threshold between quadratic and linear loss," " must be between 0 and 1).", H2OTypeConverters.toFloat()) keepCrossValidationModels = Param( Params._dummy(), "keepCrossValidationModels", "Whether to keep the cross-validation models.", H2OTypeConverters.toBoolean()) calibrateModel = Param( Params._dummy(), "calibrateModel", "Use Platt Scaling to calculate calibrated class probabilities. Calibration can provide more accurate " "estimates of class probabilities.", H2OTypeConverters.toBoolean()) foldAssignment = Param( Params._dummy(), "foldAssignment", "Cross-validation fold assignment scheme, if fold_column is not specified. The 'Stratified' option will " "stratify the folds based on the response variable, for classification problems.", H2OTypeConverters.toEnumString( "hex.Model$Parameters$FoldAssignmentScheme")) tweediePower = Param( Params._dummy(), "tweediePower", "Tweedie power for Tweedie regression, must be between 1 and 2.", H2OTypeConverters.toFloat()) categoricalEncoding = Param( Params._dummy(), "categoricalEncoding", "Encoding scheme for categorical features", H2OTypeConverters.toEnumString( "hex.Model$Parameters$CategoricalEncodingScheme")) maxCategoricalLevels = Param( Params._dummy(), "maxCategoricalLevels", "For every categorical feature, only use this many most frequent categorical levels for model training. " "Only used for categorical_encoding == EnumLimited.", H2OTypeConverters.toInt()) exportCheckpointsDir = Param( Params._dummy(), "exportCheckpointsDir", "Automatically export generated models to this directory.", H2OTypeConverters.toNullableString()) quantileAlpha = Param( Params._dummy(), "quantileAlpha", "Desired quantile for Quantile regression, must be between 0 and 1.", H2OTypeConverters.toFloat()) ignoreConstCols = Param(Params._dummy(), "ignoreConstCols", "Ignore constant columns.", H2OTypeConverters.toBoolean()) ## # Getters ## def getQuietMode(self): return self.getOrDefault(self.quietMode) def getMaxDepth(self): return self.getOrDefault(self.maxDepth) def getMinRows(self): return self.getOrDefault(self.minRows) def getMinChildWeight(self): return self.getOrDefault(self.minChildWeight) def getLearnRate(self): return self.getOrDefault(self.learnRate) def getEta(self): return self.getOrDefault(self.eta) def getSampleRate(self): return self.getOrDefault(self.sampleRate) def getSubsample(self): return self.getOrDefault(self.subsample) def getColSampleRate(self): return self.getOrDefault(self.colSampleRate) def getColSampleByLevel(self): return self.getOrDefault(self.colSampleByLevel) def getColSampleRatePerTree(self): return self.getOrDefault(self.colSampleRatePerTree) def getColSampleByTree(self): return self.getOrDefault(self.colSampleByTree) def getColSampleByNode(self): return self.getOrDefault(self.colSampleByNode) def getMaxAbsLeafnodePred(self): return self.getOrDefault(self.maxAbsLeafnodePred) def getMaxDeltaStep(self): return self.getOrDefault(self.maxDeltaStep) def getScoreTreeInterval(self): return self.getOrDefault(self.scoreTreeInterval) def getMinSplitImprovement(self): return self.getOrDefault(self.minSplitImprovement) def getGamma(self): return self.getOrDefault(self.gamma) def getNthread(self): return self.getOrDefault(self.nthread) def getMaxBins(self): return self.getOrDefault(self.maxBins) def getMaxLeaves(self): return self.getOrDefault(self.maxLeaves) def getMinSumHessianInLeaf(self): return self.getOrDefault(self.minSumHessianInLeaf) def getMinDataInLeaf(self): return self.getOrDefault(self.minDataInLeaf) def getTreeMethod(self): return self.getOrDefault(self.treeMethod) def getGrowPolicy(self): return self.getOrDefault(self.growPolicy) def getBooster(self): return self.getOrDefault(self.booster) def getDmatrixType(self): return self.getOrDefault(self.dmatrixType) def getRegLambda(self): return self.getOrDefault(self.regLambda) def getRegAlpha(self): return self.getOrDefault(self.regAlpha) def getSampleType(self): return self.getOrDefault(self.sampleType) def getNormalizeType(self): return self.getOrDefault(self.normalizeType) def getRateDrop(self): return self.getOrDefault(self.rateDrop) def getOneDrop(self): return self.getOrDefault(self.oneDrop) def getSkipDrop(self): return self.getOrDefault(self.skipDrop) def getGpuId(self): return self.getOrDefault(self.gpuId) def getBackend(self): return self.getOrDefault(self.backend) def getSaveMatrixDirectory(self): return self.getOrDefault(self.saveMatrixDirectory) def getIgnoredCols(self): return self.getOrDefault(self.ignoredCols) def getIgnoreConstCols(self): return self.getOrDefault(self.ignoreConstCols) def getBuildTreeOneNode(self): return self.getOrDefault(self.buildTreeOneNode) def getMaxRuntimeSecs(self): return self.getOrDefault(self.maxRuntimeSecs) def getScoreEachIteration(self): return self.getOrDefault(self.scoreEachIteration) def getCustomDistributionFunc(self): return self.getOrDefault(self.customDistributionFunc) def getCustomMetricFunc(self): return self.getOrDefault(self.customMetricFunc) def getHuberAlpha(self): return self.getOrDefault(self.huberAlpha) def getKeepCrossValidationModels(self): return self.getOrDefault(self.keepCrossValidationModels) def getCalibrateModel(self): return self.getOrDefault(self.calibrateModel) def getFoldAssignment(self): return self.getOrDefault(self.foldAssignment) def getTweediePower(self): return self.getOrDefault(self.tweediePower) def getCategoricalEncoding(self): return self.getOrDefault(self.categoricalEncoding) def getMaxCategoricalLevels(self): return self.getOrDefault(self.maxCategoricalLevels) def getExportCheckpointsDir(self): return self.getOrDefault(self.exportCheckpointsDir) def getQuantileAlpha(self): return self.getOrDefault(self.quantileAlpha) ## # Setters ## def setQuietMode(self, value): return self._set(quietMode=value) def setNtrees(self, value): return self._set(ntrees=value) def setMaxDepth(self, value): return self._set(maxDepth=value) def setMinRows(self, value): return self._set(minRows=value) def setMinChildWeight(self, value): return self._set(minChildWeight=value) def setLearnRate(self, value): return self._set(learnRate=value) def setEta(self, value): return self._set(eta=value) def setSampleRate(self, value): return self._set(sampleRate=value) def setSubsample(self, value): return self._set(subsample=value) def setColSampleRate(self, value): return self._set(colSampleRate=value) def setColSampleByLevel(self, value): return self._set(colSampleByLevel=value) def setColSampleRatePerTree(self, value): return self._set(colSampleRatePerTree=value) def setColSampleByTree(self, value): return self._set(colSampleByTree=value) def setColSampleByNode(self, value): return self._set(colSampleByNode=value) def setMaxAbsLeafnodePred(self, value): return self._set(maxAbsLeafnodePred=value) def setMaxDeltaStep(self, value): return self._set(maxDeltaStep=value) def setScoreTreeInterval(self, value): return self._set(scoreTreeInterval=value) def setMinSplitImprovement(self, value): return self._set(minSplitImprovement=value) def setGamma(self, value): return self._set(gamma=value) def setNthread(self, value): return self._set(nthread=value) def setMaxBins(self, value): return self._set(maxBins=value) def setMaxLeaves(self, value): return self._set(maxLeaves=value) def setMinSumHessianInLeaf(self, value): return self._set(minSumHessianInLeaf=value) def setMinDataInLeaf(self, value): return self._set(minDataInLeaf=value) def setTreeMethod(self, value): return self._set(treeMethod=value) def setGrowPolicy(self, value): return self._set(growPolicy=value) def setBooster(self, value): return self._set(booster=value) def setDmatrixType(self, value): return self._set(dmatrixType=value) def setRegLambda(self, value): return self._set(regLambda=value) def setRegAlpha(self, value): return self._set(regAlpha=value) def setSampleType(self, value): return self._set(sampleType=value) def setNormalizeType(self, value): return self._set(normalizeType=value) def setRateDrop(self, value): return self._set(rateDrop=value) def setOneDrop(self, value): return self._set(oneDrop=value) def setSkipDrop(self, value): return self._set(skipDrop=value) def setGpuId(self, value): return self._set(gpuId=value) def setBackend(self, value): return self._set(backend=value) def setSaveMatrixDirectory(self, value): return self._set(saveMatrixDirectory=value) def setIgnoredCols(self, value): return self._set(ignoredCols=value) def setIgnoreConstCols(self, value): return self._set(ignoreConstCols=value) def setBuildTreeOneNode(self, value): return self._set(buildTreeOneNode=value) def setMaxRuntimeSecs(self, value): return self._set(maxRuntimeSecs=value) def setScoreEachIteration(self, value): return self._set(scoreEachIteration=value) def setCustomDistributionFunc(self, value): return self._set(customDistributionFunc=value) def setCustomMetricFunc(self, value): return self._set(customMetricFunc=value) def setHuberAlpha(self, value): return self._set(huberAlpha=value) def setKeepCrossValidationModels(self, value): return self._set(keepCrossValidationModels=value) def setCalibrateModel(self, value): return self._set(calibrateModel=value) def setFoldAssignment(self, value): return self._set(foldAssignment=value) def setTweediePower(self, value): return self._set(tweediePower=value) def setCategoricalEncoding(self, value): return self._set(categoricalEncoding=value) def setExportCheckpointsDir(self, value): return self._set(exportCheckpointsDir=value) def setQuantileAlpha(self, value): return self._set(quantileAlpha=value) def setMaxCategoricalLevels(self, value): return self._set(maxCategoricalLevels=value)
class H2OKMeansParams(H2OAlgoUnsupervisedParams): maxIterations = Param( Params._dummy(), "maxIterations", "Maximum number of KMeans iterations to find the centroids.", H2OTypeConverters.toInt()) standardize = Param( Params._dummy(), "standardize", "Standardize the numeric columns to have a mean of zero and unit variance.", H2OTypeConverters.toBoolean()) init = Param( Params._dummy(), "init", "Initialization mode for finding the initial cluster centers.", H2OTypeConverters.toEnumString("hex.kmeans.KMeans$Initialization")) userPoints = Param( Params._dummy(), "userPoints", "This option enables to specify array of points, where each point represents coordinates of " "an initial cluster center. The user-specified points must have the same number of columns " "as the training observations. The number of rows must equal the number of clusters.", H2OTypeConverters.toNullableListListFloat()) estimateK = Param( Params._dummy(), "estimateK", "If enabled, the algorithm tries to identify optimal number of clusters, up to k clusters.", H2OTypeConverters.toBoolean()) k = Param(Params._dummy(), "k", "Number of clusters to generate.", H2OTypeConverters.toInt()) quantileAlpha = Param( Params._dummy(), "quantileAlpha", "Desired quantile for Quantile regression, must be between 0 and 1.", H2OTypeConverters.toFloat()) tweediePower = Param( Params._dummy(), "tweediePower", "Tweedie power for Tweedie regression, must be between 1 and 2.", H2OTypeConverters.toFloat()) maxCategoricalLevels = Param( Params._dummy(), "maxCategoricalLevels", "For every categorical feature, only use this many most frequent categorical levels for model training. " "Only used for categorical_encoding == EnumLimited.", H2OTypeConverters.toInt()) ignoredCols = Param(Params._dummy(), "ignoredCols", "Names of columns to ignore for training.", H2OTypeConverters.toNullableListString()) ignoreConstCols = Param(Params._dummy(), "ignoreConstCols", "Ignore constant columns.", H2OTypeConverters.toBoolean()) scoreEachIteration = Param( Params._dummy(), "scoreEachIteration", "Whether to score during each iteration of model training.", H2OTypeConverters.toBoolean()) customDistributionFunc = Param( Params._dummy(), "customDistributionFunc", "Reference to custom distribution, format: `language:keyName=funcName`", H2OTypeConverters.toNullableString()) customMetricFunc = Param( Params._dummy(), "customMetricFunc", "Reference to custom evaluation function, format: `language:keyName=funcName`", H2OTypeConverters.toNullableString()) exportCheckpointsDir = Param( Params._dummy(), "exportCheckpointsDir", "Automatically export generated models to this directory.", H2OTypeConverters.toNullableString()) stoppingRounds = Param( Params._dummy(), "stoppingRounds", "Early stopping based on convergence of stopping_metric. Stop if simple moving average of length k of" " the stopping_metric does not improve for k:=stopping_rounds scoring events (0 to disable)", H2OTypeConverters.toInt()) maxRuntimeSecs = Param( Params._dummy(), "maxRuntimeSecs", "Maximum allowed runtime in seconds for model training. Use 0 to disable.", H2OTypeConverters.toFloat()) clusterSizeConstraints = Param( Params._dummy(), "clusterSizeConstraints", "An array specifying the minimum number of points that should be in each cluster. The length of the constraints" " array has to be the same as the number of clusters.", H2OTypeConverters.toNullableListFloat()) stoppingTolerance = Param( Params._dummy(), "stoppingTolerance", "Relative tolerance for metric-based stopping criterion (stop if relative improvement is not" " at least this much)", H2OTypeConverters.toFloat()) foldAssignment = Param( Params._dummy(), "foldAssignment", "Cross-validation fold assignment scheme, if fold_column is not specified. The 'Stratified' option will " "stratify the folds based on the response variable, for classification problems.", H2OTypeConverters.toEnumString( "hex.Model$Parameters$FoldAssignmentScheme")) categoricalEncoding = Param( Params._dummy(), "categoricalEncoding", "Encoding scheme for categorical features", H2OTypeConverters.toEnumString( "hex.Model$Parameters$CategoricalEncodingScheme")) stoppingMetric = Param( Params._dummy(), "stoppingMetric", "Metric to use for early stopping (AUTO: logloss for classification, deviance for regression and" " anonomaly_score for Isolation Forest). Note that custom and custom_increasing can only be used" " in GBM and DRF with the Python client.", H2OTypeConverters.toEnumString("hex.ScoreKeeper$StoppingMetric")) huberAlpha = Param( Params._dummy(), "huberAlpha", "Desired quantile for Huber/M-regression (threshold between quadratic and linear loss," " must be between 0 and 1).", H2OTypeConverters.toFloat()) keepCrossValidationModels = Param( Params._dummy(), "keepCrossValidationModels", "Whether to keep the cross-validation models.", H2OTypeConverters.toBoolean()) # # Getters # def getMaxIterations(self): return self.getOrDefault(self.maxIterations) def getStandardize(self): return self.getOrDefault(self.standardize) def getInit(self): return self.getOrDefault(self.init) def getUserPoints(self): return self.getOrDefault(self.userPoints) def getEstimateK(self): return self.getOrDefault(self.estimateK) def getK(self): return self.getOrDefault(self.k) def getQuantileAlpha(self): return self.getOrDefault(self.quantileAlpha) def getTweediePower(self): return self.getOrDefault(self.tweediePower) def getMaxCategoricalLevels(self): return self.getOrDefault(self.maxCategoricalLevels) def getIgnoredCols(self): return self.getOrDefault(self.ignoredCols) def getIgnoreConstCols(self): return self.getOrDefault(self.ignoreConstCols) def getScoreEachIteration(self): return self.getOrDefault(self.scoreEachIteration) def getCustomDistributionFunc(self): return self.getOrDefault(self.customDistributionFunc) def getCustomMetricFunc(self): return self.getOrDefault(self.customMetricFunc) def getExportCheckpointsDir(self): return self.getOrDefault(self.exportCheckpointsDir) def getStoppingRounds(self): return self.getOrDefault(self.stoppingRounds) def getMaxRuntimeSecs(self): return self.getOrDefault(self.maxRuntimeSecs) def getClusterSizeConstraints(self): return self.getOrDefault(self.clusterSizeConstraints) def getStoppingTolerance(self): return self.getOrDefault(self.stoppingTolerance) def getFoldAssignment(self): return self.getOrDefault(self.foldAssignment) def getCategoricalEncoding(self): return self.getOrDefault(self.categoricalEncoding) def getStoppingMetric(self): return self.getOrDefault(self.stoppingMetric) def getHuberAlpha(self): return self.getOrDefault(self.huberAlpha) def getKeepCrossValidationModels(self): return self.getOrDefault(self.keepCrossValidationModels) # # Setters # def setMaxIterations(self, value): return self._set(maxIterations=value) def setStandardize(self, value): return self._set(standardize=value) def setInit(self, value): return self._set(init=value) def setUserPoints(self, value): return self._set(userPoints=value) def setEstimateK(self, value): return self._set(estimateK=value) def setK(self, value): return self._set(k=value) def setQuantileAlpha(self, value): return self._set(quantileAlpha=value) def setTweediePower(self, value): return self._set(tweediePower=value) def setMaxCategoricalLevels(self, value): return self._set(maxCategoricalLevels=value) def setIgnoredCols(self, value): return self._set(ignoredCols=value) def setIgnoreConstCols(self, value): return self._set(ignoreConstCols=value) def setScoreEachIteration(self, value): return self._set(scoreEachIteration=value) def setCustomDistributionFunc(self, value): return self._set(customDistributionFunc=value) def setCustomMetricFunc(self, value): return self._set(customMetricFunc=value) def setExportCheckpointsDir(self, value): return self._set(exportCheckpointsDir=value) def setStoppingRounds(self, value): return self._set(stoppingRounds=value) def setMaxRuntimeSecs(self, value): return self._set(maxRuntimeSecs=value) def setClusterSizeConstraints(self, value): return self._set(clusterSizeConstraints=value) def setStoppingTolerance(self, value): return self._set(stoppingTolerance=value) def setFoldAssignment(self, value): return self._set(foldAssignment=value) def setCategoricalEncoding(self, value): return self._set(categoricalEncoding=value) def setStoppingMetric(self, value): return self._set(stoppingMetric=value) def setHuberAlpha(self, value): return self._set(huberAlpha=value) def setKeepCrossValidationModels(self, value): return self._set(keepCrossValidationModels=value)
class H2OGridSearchParams(H2OCommonSupervisedParams): ## # Param definitions ## algo = Param(Params._dummy(), "algo", "Algo to run grid search on") hyperParameters = Param(Params._dummy(), "hyperParameters", "Grid Search Hyper Params map") strategy = Param( Params._dummy(), "strategy", "strategy", H2OTypeConverters.toEnumString( "hex.grid.HyperSpaceSearchCriteria$Strategy")) maxRuntimeSecs = Param(Params._dummy(), "maxRuntimeSecs", "maxRuntimeSecs", H2OTypeConverters.toFloat()) maxModels = Param(Params._dummy(), "maxModels", "maxModels", H2OTypeConverters.toInt()) stoppingRounds = Param(Params._dummy(), "stoppingRounds", "stoppingRounds", H2OTypeConverters.toInt()) stoppingTolerance = Param(Params._dummy(), "stoppingTolerance", "stoppingTolerance", H2OTypeConverters.toFloat()) stoppingMetric = Param( Params._dummy(), "stoppingMetric", "stoppingMetric", H2OTypeConverters.toEnumString("hex.ScoreKeeper$StoppingMetric")) selectBestModelBy = Param( Params._dummy(), "selectBestModelBy", "selectBestModelBy", H2OTypeConverters.toEnumString( "ai.h2o.sparkling.ml.algos.H2OGridSearchMetric")) selectBestModelDecreasing = Param(Params._dummy(), "selectBestModelDecreasing", "selectBestModelDecreasing", H2OTypeConverters.toBoolean()) ## # Getters ## def getAlgoParams(self): return self._java_obj.getAlgoParams() def getHyperParameters(self): params = self.getOrDefault(self.hyperParameters) if isinstance(params, JavaObject): keys = [k for k in params.keySet().toArray()] map = {} for k in keys: map[k] = [v for v in params.get(k)] return map else: return params def getStrategy(self): return self.getOrDefault(self.strategy) def getMaxRuntimeSecs(self): return self.getOrDefault(self.maxRuntimeSecs) def getMaxModels(self): return self.getOrDefault(self.maxModels) def getStoppingRounds(self): return self.getOrDefault(self.stoppingRounds) def getStoppingTolerance(self): return self.getOrDefault(self.stoppingTolerance) def getStoppingMetric(self): return self.getOrDefault(self.stoppingMetric) def getSelectBestModelBy(self): return self.getOrDefault(self.selectBestModelBy) def getSelectBestModelDecreasing(self): return self.getOrDefault(self.selectBestModelDecreasing) ## # Setters ## def setAlgo(self, value): assert_is_type(value, object) self._java_obj.setAlgo(value._java_obj) return self def setHyperParameters(self, value): assert_is_type(value, None, {str: [object]}) return self._set(hyperParameters=value) def setStrategy(self, value): return self._set(link=value) def setMaxRuntimeSecs(self, value): return self._set(maxRuntimeSecs=value) def setMaxModels(self, value): return self._set(maxModels=value) def setStoppingRounds(self, value): return self._set(stoppingRounds=value) def setStoppingTolerance(self, value): return self._set(stoppingTolerance=value) def setStoppingMetric(self, value): return self._set(stoppingMetric=value) def setSelectBestModelBy(self, value): return self._set(selectBestModelBy=value) def setSelectBestModelDecreasing(self, value): return self._set(selectBestModelDecreasing=value)
class H2OGLMParams(H2OAlgoSupervisedParams): ## # Param definitions ## standardize = Param(Params._dummy(), "standardize", "standardize", H2OTypeConverters.toBoolean()) family = Param( Params._dummy(), "family", "family", H2OTypeConverters.toEnumString( "hex.glm.GLMModel$GLMParameters$Family")) link = Param( Params._dummy(), "link", "link", H2OTypeConverters.toEnumString("hex.glm.GLMModel$GLMParameters$Link")) solver = Param( Params._dummy(), "solver", "solver", H2OTypeConverters.toEnumString( "hex.glm.GLMModel$GLMParameters$Solver")) tweedieVariancePower = Param(Params._dummy(), "tweedieVariancePower", "Tweedie variance power", H2OTypeConverters.toFloat()) tweedieLinkPower = Param(Params._dummy(), "tweedieLinkPower", "Tweedie link power", H2OTypeConverters.toFloat()) alphaValue = Param(Params._dummy(), "alphaValue", "alphaValue", H2OTypeConverters.toNullableListFloat()) lambdaValue = Param(Params._dummy(), "lambdaValue", "lambdaValue", H2OTypeConverters.toNullableListFloat()) missingValuesHandling = Param( Params._dummy(), "missingValuesHandling", "missingValuesHandling", H2OTypeConverters.toEnumString( "hex.deeplearning.DeepLearningModel$DeepLearningParameters$MissingValuesHandling" )) prior = Param(Params._dummy(), "prior", "prior", H2OTypeConverters.toFloat()) lambdaSearch = Param(Params._dummy(), "lambdaSearch", "lambda search", H2OTypeConverters.toBoolean()) nlambdas = Param(Params._dummy(), "nlambdas", "nlambdas", H2OTypeConverters.toInt()) nonNegative = Param(Params._dummy(), "nonNegative", "nonNegative", H2OTypeConverters.toBoolean()) lambdaMinRatio = Param(Params._dummy(), "lambdaMinRatio", "lambdaMinRatio", H2OTypeConverters.toFloat()) maxIterations = Param(Params._dummy(), "maxIterations", "maxIterations", H2OTypeConverters.toInt()) intercept = Param(Params._dummy(), "intercept", "intercept", H2OTypeConverters.toBoolean()) betaEpsilon = Param(Params._dummy(), "betaEpsilon", "betaEpsilon", H2OTypeConverters.toFloat()) objectiveEpsilon = Param(Params._dummy(), "objectiveEpsilon", "objectiveEpsilon", H2OTypeConverters.toFloat()) gradientEpsilon = Param(Params._dummy(), "gradientEpsilon", "gradientEpsilon", H2OTypeConverters.toFloat()) objReg = Param(Params._dummy(), "objReg", "objReg", H2OTypeConverters.toFloat()) computePValues = Param(Params._dummy(), "computePValues", "computePValues", H2OTypeConverters.toBoolean()) removeCollinearCols = Param(Params._dummy(), "removeCollinearCols", "removeCollinearCols", H2OTypeConverters.toBoolean()) interactions = Param(Params._dummy(), "interactions", "interactions", H2OTypeConverters.toNullableListString()) interactionPairs = Param(Params._dummy(), "interactionPairs", "interactionPairs") earlyStopping = Param(Params._dummy(), "earlyStopping", "earlyStopping", H2OTypeConverters.toBoolean()) balanceClasses = Param( Params._dummy(), "balanceClasses", "Balance training data class counts via over/under-sampling (for imbalanced data).", H2OTypeConverters.toBoolean()) quantileAlpha = Param( Params._dummy(), "quantileAlpha", "Desired quantile for Quantile regression, must be between 0 and 1.", H2OTypeConverters.toFloat()) stoppingMetric = Param( Params._dummy(), "stoppingMetric", "Metric to use for early stopping (AUTO: logloss for classification, deviance for regression and" " anonomaly_score for Isolation Forest). Note that custom and custom_increasing can only be used" " in GBM and DRF with the Python client.", H2OTypeConverters.toEnumString("hex.ScoreKeeper$StoppingMetric")) stoppingTolerance = Param( Params._dummy(), "stoppingTolerance", "Relative tolerance for metric-based stopping criterion (stop if relative improvement is not" " at least this much)", H2OTypeConverters.toFloat()) stoppingRounds = Param( Params._dummy(), "stoppingRounds", "Early stopping based on convergence of stopping_metric. Stop if simple moving average of length k of" " the stopping_metric does not improve for k:=stopping_rounds scoring events (0 to disable)", H2OTypeConverters.toInt()) categoricalEncoding = Param( Params._dummy(), "categoricalEncoding", "Encoding scheme for categorical features", H2OTypeConverters.toEnumString( "hex.Model$Parameters$CategoricalEncodingScheme")) exportCheckpointsDir = Param( Params._dummy(), "exportCheckpointsDir", "Automatically export generated models to this directory.", H2OTypeConverters.toNullableString()) ignoredCols = Param(Params._dummy(), "ignoredCols", "Names of columns to ignore for training.", H2OTypeConverters.toNullableListString()) ignoreConstCols = Param(Params._dummy(), "ignoreConstCols", "Ignore constant columns.", H2OTypeConverters.toBoolean()) classSamplingFactors = Param( Params._dummy(), "classSamplingFactors", "Desired over/under-sampling ratios per class (in lexicographic order). If not specified, sampling factors " "will be automatically computed to obtain class balance during training. Requires balance_classes.", H2OTypeConverters.toNullableListFloat()) maxAfterBalanceSize = Param( Params._dummy(), "maxAfterBalanceSize", "Maximum relative size of the training data after balancing class counts (can be less than 1.0). " "Requires balance_classes.", H2OTypeConverters.toFloat()) maxCategoricalLevels = Param( Params._dummy(), "maxCategoricalLevels", "For every categorical feature, only use this many most frequent categorical levels for model training. " "Only used for categorical_encoding == EnumLimited.", H2OTypeConverters.toInt()) HGLM = Param( Params._dummy(), "HGLM", "If set to true, will return HGLM model. Otherwise, normal GLM model will be returned", H2OTypeConverters.toBoolean()) customDistributionFunc = Param( Params._dummy(), "customDistributionFunc", "Reference to custom distribution, format: `language:keyName=funcName`", H2OTypeConverters.toNullableString()) customMetricFunc = Param( Params._dummy(), "customMetricFunc", "Reference to custom evaluation function, format: `language:keyName=funcName`", H2OTypeConverters.toNullableString()) startval = Param( Params._dummy(), "startval", "double array to initialize fixed and random coefficients for HGLM.", H2OTypeConverters.toNullableListFloat()) keepCrossValidationModels = Param( Params._dummy(), "keepCrossValidationModels", "Whether to keep the cross-validation models.", H2OTypeConverters.toBoolean()) theta = Param(Params._dummy(), "theta", "Theta", H2OTypeConverters.toFloat()) scoreEachIteration = Param( Params._dummy(), "scoreEachIteration", "Whether to score during each iteration of model training.", H2OTypeConverters.toBoolean()) tweediePower = Param( Params._dummy(), "tweediePower", "Tweedie power for Tweedie regression, must be between 1 and 2.", H2OTypeConverters.toFloat()) huberAlpha = Param( Params._dummy(), "huberAlpha", "Desired quantile for Huber/M-regression (threshold between quadratic and linear loss," " must be between 0 and 1).", H2OTypeConverters.toFloat()) maxActivePredictors = Param( Params._dummy(), "maxActivePredictors", "Maximum number of active predictors during computation. Use as a stopping criterion to prevent expensive " "model building with many predictors. Default indicates: If the IRLSM solver is used, the value of " "max_active_predictors is set to 5000 otherwise it is set to 100000000.", H2OTypeConverters.toInt()) foldAssignment = Param( Params._dummy(), "foldAssignment", "Cross-validation fold assignment scheme, if fold_column is not specified. The 'Stratified' option will " "stratify the folds based on the response variable, for classification problems.", H2OTypeConverters.toEnumString( "hex.Model$Parameters$FoldAssignmentScheme")) calcLike = Param( Params._dummy(), "calcLike", "if true, will return likelihood function value for HGLM.", H2OTypeConverters.toBoolean()) maxRuntimeSecs = Param( Params._dummy(), "maxRuntimeSecs", "Maximum allowed runtime in seconds for model training. Use 0 to disable.", H2OTypeConverters.toFloat()) ## # Getters ## def getStandardize(self): return self.getOrDefault(self.standardize) def getFamily(self): return self.getOrDefault(self.family) def getLink(self): return self.getOrDefault(self.link) def getSolver(self): return self.getOrDefault(self.solver) def getTweedieVariancePower(self): return self.getOrDefault(self.tweedieVariancePower) def getTweedieLinkPower(self): return self.getOrDefault(self.tweedieLinkPower) def getAlphaValue(self): return self.getOrDefault(self.alphaValue) def getLambdaValue(self): return self.getOrDefault(self.lambdaValue) def getMissingValuesHandling(self): return self.getOrDefault(self.missingValuesHandling) def getPrior(self): return self.getOrDefault(self.prior) def getLambdaSearch(self): return self.getOrDefault(self.lambdaSearch) def getNlambdas(self): return self.getOrDefault(self.nlambdas) def getNonNegative(self): return self.getOrDefault(self.nonNegative) def getLambdaMinRatio(self): return self.getOrDefault(self.lambdaMinRatio) def getMaxIterations(self): return self.getOrDefault(self.maxIterations) def getIntercept(self): return self.getOrDefault(self.intercept) def getBetaEpsilon(self): return self.getOrDefault(self.betaEpsilon) def getObjectiveEpsilon(self): return self.getOrDefault(self.objectiveEpsilon) def getGradientEpsilon(self): return self.getOrDefault(self.gradientEpsilon) def getObjReg(self): return self.getOrDefault(self.objReg) def getComputePValues(self): return self.getOrDefault(self.computePValues) def getRemoveCollinearCols(self): return self.getOrDefault(self.removeCollinearCols) def getInteractions(self): return self.getOrDefault(self.interactions) def getInteractionPairs(self): return self.getOrDefault(self.interactionPairs) def getEarlyStopping(self): return self.getOrDefault(self.earlyStopping) def getBalanceClasses(self): return self.getOrDefault(self.balanceClasses) def getQuantileAlpha(self): return self.getOrDefault(self.quantileAlpha) def getStoppingMetric(self): return self.getOrDefault(self.stoppingMetric) def getStoppingTolerance(self): return self.getOrDefault(self.stoppingTolerance) def getStoppingRounds(self): return self.getOrDefault(self.stoppingRounds) def getCategoricalEncoding(self): return self.getOrDefault(self.categoricalEncoding) def getExportCheckpointsDir(self): return self.getOrDefault(self.exportCheckpointsDir) def getIgnoredCols(self): return self.getOrDefault(self.ignoredCols) def getIgnoreConstCols(self): return self.getOrDefault(self.ignoreConstCols) def getClassSamplingFactors(self): return self.getOrDefault(self.classSamplingFactors) def getMaxCategoricalLevels(self): return self.getOrDefault(self.maxCategoricalLevels) def getMaxAfterBalanceSize(self): return self.getOrDefault(self.maxAfterBalanceSize) def getHGLM(self): return self.getOrDefault(self.HGLM) def getCustomDistributionFunc(self): return self.getOrDefault(self.customDistributionFunc) def getCustomMetricFunc(self): return self.getOrDefault(self.customMetricFunc) def getStartval(self): return self.getOrDefault(self.startval) def getKeepCrossValidationModels(self): return self.getOrDefault(self.keepCrossValidationModels) def getTheta(self): return self.getOrDefault(self.theta) def getScoreEachIteration(self): return self.getOrDefault(self.scoreEachIteration) def getTweediePower(self): return self.getOrDefault(self.tweediePower) def getHuberAlpha(self): return self.getOrDefault(self.huberAlpha) def getMaxActivePredictors(self): return self.getOrDefault(self.maxActivePredictors) def getFoldAssignment(self): return self.getOrDefault(self.foldAssignment) def getCalcLike(self): return self.getOrDefault(self.calcLike) def getMaxRuntimeSecs(self): return self.getOrDefault(self.maxRuntimeSecs) ## # Setters ## def setStandardize(self, value): return self._set(standardize=value) def setFamily(self, value): return self._set(family=value) def setLink(self, value): return self._set(link=value) def setSolver(self, value): return self._set(solver=value) def setTweedieVariancePower(self, value): return self._set(tweedieVariancePower=value) def setTweedieLinkPower(self, value): return self._set(tweedieLinkPower=value) def setAlphaValue(self, value): return self._set(alphaValue=value) def setLambdaValue(self, value): return self._set(lambdaValue=value) def setMissingValuesHandling(self, value): return self._set(missingValuesHandling=value) def setPrior(self, value): return self._set(prior=value) def setLambdaSearch(self, value): return self._set(lambdaSearch=value) def setNlambdas(self, value): return self._set(nlambdas=value) def setNonNegative(self, value): return self._set(nonNegative=value) def setLambdaMinRatio(self, value): return self._set(lambdaMinRatio=value) def setMaxIterations(self, value): return self._set(maxIterations=value) def setIntercept(self, value): return self._set(intercept=value) def setBetaEpsilon(self, value): return self._set(betaEpsilon=value) def setObjectiveEpsilon(self, value): return self._set(objectiveEpsilon=value) def setGradientEpsilon(self, value): return self._set(gradientEpsilon=value) def setObjReg(self, value): return self._set(objReg=value) def setComputePValues(self, value): return self._set(computePValues=value) def setRemoveCollinearCols(self, value): return self._set(removeCollinearCols=value) def setInteractions(self, value): return self._set(interactions=value) def setInteractionPairs(self, value): assert_is_type(value, None, [(str, str)]) return self._set(interactionPairs=value) def setEarlyStopping(self, value): return self._set(earlyStopping=value) def setBalanceClasses(self, value): return self._set(balanceClasses=value) def setQuantileAlpha(self, value): return self._set(quantileAlpha=value) def setStoppingMetric(self, value): return self._set(stoppingMetric=value) def setStoppingTolerance(self, value): return self._set(stoppingTolerance=value) def setStoppingRounds(self, value): return self._set(stoppingRounds=value) def setCategoricalEncoding(self, value): return self._set(categoricalEncoding=value) def setExportCheckpointsDir(self, value): return self._set(exportCheckpointsDir=value) def setIgnoredCols(self, value): return self._set(ignoredCols=value) def setIgnoreConstCols(self, value): return self._set(ignoreConstCols=value) def setClassSamplingFactors(self, value): return self._set(classSamplingFactors=value) def setMaxAfterBalanceSize(self, value): return self._set(maxAfterBalanceSize=value) def setHGLM(self, value): return self._set(HGLM=value) def setCustomDistributionFunc(self, value): return self._set(customDistributionFunc=value) def setCustomMetricFunc(self, value): return self._set(customMetricFunc=value) def setStartval(self, value): return self._set(startval=value) def setKeepCrossValidationModels(self, value): return self._set(keepCrossValidationModels=value) def setTheta(self, value): return self._set(theta=value) def setScoreEachIteration(self, value): return self._set(scoreEachIteration=value) def setTweediePower(self, value): return self._set(tweediePower=value) def setHuberAlpha(self, value): return self._set(huberAlpha=value) def setMaxActivePredictors(self, value): return self._set(maxActivePredictors=value) def setFoldAssignment(self, value): return self._set(foldAssignment=value) def setCalcLike(self, value): return self._set(calcLike=value) def setMaxRuntimeSecs(self, value): return self._set(maxRuntimeSecs=value) def setMaxCategoricalLevels(self, value): return self._set(maxCategoricalLevels=value)
class H2OXGBoostParams(H2OAlgoSupervisedParams): ## # Param definitions ## quietMode = Param( Params._dummy(), "quietMode", "Quiet mode", H2OTypeConverters.toBoolean()) ntrees = Param( Params._dummy(), "ntrees", "Number of trees", H2OTypeConverters.toInt()) nEstimators = Param( Params._dummy(), "nEstimators", "number of estimators", H2OTypeConverters.toInt()) maxDepth = Param( Params._dummy(), "maxDepth", "Maximal depth", H2OTypeConverters.toInt()) minRows = Param( Params._dummy(), "minRows", "Min rows", H2OTypeConverters.toFloat()) minChildWeight = Param( Params._dummy(), "minChildWeight", "minimal child weight", H2OTypeConverters.toFloat()) learnRate = Param( Params._dummy(), "learnRate", "learn rate", H2OTypeConverters.toFloat()) eta = Param( Params._dummy(), "eta", "eta", H2OTypeConverters.toFloat()) learnRateAnnealing = Param( Params._dummy(), "learnRateAnnealing", "Learn Rate Annealing", H2OTypeConverters.toFloat()) sampleRate = Param( Params._dummy(), "sampleRate", "Sample rate", H2OTypeConverters.toFloat()) subsample = Param( Params._dummy(), "subsample", "subsample", H2OTypeConverters.toFloat()) colSampleRate = Param( Params._dummy(), "colSampleRate", "col sample rate", H2OTypeConverters.toFloat()) colSampleByLevel = Param( Params._dummy(), "colSampleByLevel", "Col Sample By Level", H2OTypeConverters.toFloat()) colSampleRatePerTree = Param( Params._dummy(), "colSampleRatePerTree", "col samle rate", H2OTypeConverters.toFloat()) colSampleByTree = Param( Params._dummy(), "colSampleByTree", "col sample by tree", H2OTypeConverters.toFloat()) maxAbsLeafnodePred = Param( Params._dummy(), "maxAbsLeafnodePred", "max abs lead node prediction", H2OTypeConverters.toFloat()) maxDeltaStep = Param( Params._dummy(), "maxDeltaStep", "max delta step", H2OTypeConverters.toFloat()) scoreTreeInterval = Param( Params._dummy(), "scoreTreeInterval", "score tree interval", H2OTypeConverters.toInt()) initialScoreInterval = Param( Params._dummy(), "initialScoreInterval", "Initial Score Interval", H2OTypeConverters.toInt()) scoreInterval = Param( Params._dummy(), "scoreInterval", "Score Interval", H2OTypeConverters.toInt()) minSplitImprovement = Param( Params._dummy(), "minSplitImprovement", "Min split improvement", H2OTypeConverters.toFloat()) gamma = Param( Params._dummy(), "gamma", "gamma", H2OTypeConverters.toFloat()) nthread = Param( Params._dummy(), "nthread", "nthread", H2OTypeConverters.toInt()) maxBins = Param( Params._dummy(), "maxBins", "nbins", H2OTypeConverters.toInt()) maxLeaves = Param( Params._dummy(), "maxLeaves", "max leaves", H2OTypeConverters.toInt()) minSumHessianInLeaf = Param( Params._dummy(), "minSumHessianInLeaf", "min sum hessian in leaf", H2OTypeConverters.toFloat()) minDataInLeaf = Param( Params._dummy(), "minDataInLeaf", "min data in leaf", H2OTypeConverters.toFloat()) treeMethod = Param( Params._dummy(), "treeMethod", "Tree Method", H2OTypeConverters.toEnumString("hex.tree.xgboost.XGBoostModel$XGBoostParameters$TreeMethod")) growPolicy = Param( Params._dummy(), "growPolicy", "Grow Policy", H2OTypeConverters.toEnumString("hex.tree.xgboost.XGBoostModel$XGBoostParameters$GrowPolicy")) booster = Param( Params._dummy(), "booster", "Booster", H2OTypeConverters.toEnumString("hex.tree.xgboost.XGBoostModel$XGBoostParameters$Booster")) dmatrixType = Param( Params._dummy(), "dmatrixType", "DMatrix type", H2OTypeConverters.toEnumString("hex.tree.xgboost.XGBoostModel$XGBoostParameters$DMatrixType")) regLambda = Param( Params._dummy(), "regLambda", "req lambda", H2OTypeConverters.toFloat()) regAlpha = Param( Params._dummy(), "regAlpha", "req aplha", H2OTypeConverters.toFloat()) sampleType = Param( Params._dummy(), "sampleType", "Dart Sample Type", H2OTypeConverters.toEnumString("hex.tree.xgboost.XGBoostModel$XGBoostParameters$DartSampleType")) normalizeType = Param( Params._dummy(), "normalizeType", "Dart Normalize Type", H2OTypeConverters.toEnumString("hex.tree.xgboost.XGBoostModel$XGBoostParameters$DartNormalizeType")) rateDrop = Param( Params._dummy(), "rateDrop", "rate drop", H2OTypeConverters.toFloat()) oneDrop = Param( Params._dummy(), "oneDrop", "onde drop", H2OTypeConverters.toBoolean()) skipDrop = Param( Params._dummy(), "skipDrop", "skip drop", H2OTypeConverters.toFloat()) gpuId = Param( Params._dummy(), "gpuId", "GPU id", H2OTypeConverters.toInt()) backend = Param( Params._dummy(), "backend", "Backend", H2OTypeConverters.toEnumString("hex.tree.xgboost.XGBoostModel$XGBoostParameters$Backend")) ## # Getters ## def getQuietMode(self): return self.getOrDefault(self.quietMode) def getNtrees(self): return self.getOrDefault(self.ntrees) def getNEstimators(self): return self.getOrDefault(self.nEstimators) def getMaxDepth(self): return self.getOrDefault(self.maxDepth) def getMinRows(self): return self.getOrDefault(self.minRows) def getMinChildWeight(self): return self.getOrDefault(self.minChildWeight) def getLearnRate(self): return self.getOrDefault(self.learnRate) def getEta(self): return self.getOrDefault(self.eta) def getLearnRateAnnealing(self): return self.getOrDefault(self.learnRateAnnealing) def getSampleRate(self): return self.getOrDefault(self.sampleRate) def getSubsample(self): return self.getOrDefault(self.subsample) def getColSampleRate(self): return self.getOrDefault(self.colSampleRate) def getColSampleByLevel(self): return self.getOrDefault(self.colSampleByLevel) def getColSampleRatePerTree(self): return self.getOrDefault(self.colSampleRatePerTree) def getColSampleByTree(self): return self.getOrDefault(self.colSampleByTree) def getMaxAbsLeafnodePred(self): return self.getOrDefault(self.maxAbsLeafnodePred) def getMaxDeltaStep(self): return self.getOrDefault(self.maxDeltaStep) def getScoreTreeInterval(self): return self.getOrDefault(self.scoreTreeInterval) def getInitialScoreInterval(self): return self.getOrDefault(self.initialScoreInterval) def getScoreInterval(self): return self.getOrDefault(self.scoreInterval) def getMinSplitImprovement(self): return self.getOrDefault(self.minSplitImprovement) def getGamma(self): return self.getOrDefault(self.gamma) def getNthread(self): return self.getOrDefault(self.nthread) def getMaxBins(self): return self.getOrDefault(self.maxBins) def getMaxLeaves(self): return self.getOrDefault(self.maxLeaves) def getMinSumHessianInLeaf(self): return self.getOrDefault(self.minSumHessianInLeaf) def getMinDataInLeaf(self): return self.getOrDefault(self.minDataInLeaf) def getTreeMethod(self): return self.getOrDefault(self.treeMethod) def getGrowPolicy(self): return self.getOrDefault(self.growPolicy) def getBooster(self): return self.getOrDefault(self.booster) def getDmatrixType(self): return self.getOrDefault(self.dmatrixType) def getRegLambda(self): return self.getOrDefault(self.regLambda) def getRegAlpha(self): return self.getOrDefault(self.regAlpha) def getSampleType(self): return self.getOrDefault(self.sampleType) def getNormalizeType(self): return self.getOrDefault(self.normalizeType) def getRateDrop(self): return self.getOrDefault(self.rateDrop) def getOneDrop(self): return self.getOrDefault(self.oneDrop) def getSkipDrop(self): return self.getOrDefault(self.skipDrop) def getGpuId(self): return self.getOrDefault(self.gpuId) def getBackend(self): return self.getOrDefault(self.backend) ## # Setters ## def setQuietMode(self, value): return self._set(quietMode=value) def setNtrees(self, value): return self._set(ntrees=value) def setNEstimators(self, value): return self._set(nEstimators=value) def setMaxDepth(self, value): return self._set(maxDepth=value) def setMinRows(self, value): return self._set(minRows=value) def setMinChildWeight(self, value): return self._set(minChildWeight=value) def setLearnRate(self, value): return self._set(learnRate=value) def setEta(self, value): return self._set(eta=value) def setLearnRateAnnealing(self, value): return self._set(learnRateAnnealing=value) def setSampleRate(self, value): return self._set(sampleRate=value) def setSubsample(self, value): return self._set(subsample=value) def setColSampleRate(self, value): return self._set(colSampleRate=value) def setColSampleByLevel(self, value): return self._set(colSampleByLevel=value) def setColSampleRatePerTree(self, value): return self._set(colSampleRatePerTree=value) def setColSampleByTree(self, value): return self._set(colSampleByTree=value) def setMaxAbsLeafnodePred(self, value): return self._set(maxAbsLeafnodePred=value) def setMaxDeltaStep(self, value): return self._set(maxDeltaStep=value) def setScoreTreeInterval(self, value): return self._set(scoreTreeInterval=value) def setInitialScoreInterval(self, value): return self._set(initialScoreInterval=value) def setScoreInterval(self, value): return self._set(scoreInterval=value) def setMinSplitImprovement(self, value): return self._set(minSplitImprovement=value) def setGamma(self, value): return self._set(gamma=value) def setNthread(self, value): return self._set(nthread=value) def setMaxBins(self, value): return self._set(maxBins=value) def setMaxLeaves(self, value): return self._set(maxLeaves=value) def setMinSumHessianInLeaf(self, value): return self._set(minSumHessianInLeaf=value) def setMinDataInLeaf(self, value): return self._set(minDataInLeaf=value) def setTreeMethod(self, value): return self._set(treeMethod=value) def setGrowPolicy(self, value): return self._set(growPolicy=value) def setBooster(self, value): return self._set(booster=value) def setDmatrixType(self, value): return self._set(dmatrixType=value) def setRegLambda(self, value): return self._set(regLambda=value) def setRegAlpha(self, value): return self._set(regAlpha=value) def setSampleType(self, value): return self._set(sampleType=value) def setNormalizeType(self, value): return self._set(normalizeType=value) def setRateDrop(self, value): return self._set(rateDrop=value) def setOneDrop(self, value): return self._set(oneDrop=value) def setSkipDrop(self, value): return self._set(skipDrop=value) def setGpuId(self, value): return self._set(gpuId=value) def setBackend(self, value): return self._set(backend=value)
class H2OGBMParams(H2OSharedTreeParams, HasMonotoneConstraints, HasQuantileAlpha): ## # Param definitions ## learnRate = Param(Params._dummy(), "learnRate", "Learning rate (from 0.0 to 1.0)", H2OTypeConverters.toFloat()) learnRateAnnealing = Param( Params._dummy(), "learnRateAnnealing", "Scale the learning rate by this factor after each tree (e.g., 0.99 or 0.999)", H2OTypeConverters.toFloat()) colSampleRate = Param(Params._dummy(), "colSampleRate", "Column sample rate (from 0.0 to 1.0)", H2OTypeConverters.toFloat()) maxAbsLeafnodePred = Param( Params._dummy(), "maxAbsLeafnodePred", "Maximum absolute value of a leaf node prediction", H2OTypeConverters.toFloat()) predNoiseBandwidth = Param( Params._dummy(), "predNoiseBandwidth", "Bandwidth (sigma) of Gaussian multiplicative noise ~N(1,sigma) for tree node predictions", H2OTypeConverters.toFloat()) classSamplingFactors = Param( Params._dummy(), "classSamplingFactors", "Desired over/under-sampling ratios per class (in lexicographic order). If not specified, sampling factors " "will be automatically computed to obtain class balance during training. Requires balance_classes.", H2OTypeConverters.toNullableListFloat()) checkConstantResponse = Param( Params._dummy(), "checkConstantResponse", "Check if response column is constant. If enabled, then an exception is thrown if the response column " "is a constant value.If disabled, then model will train regardless of the response column being a constant " "value or not.", H2OTypeConverters.toBoolean()) customDistributionFunc = Param( Params._dummy(), "customDistributionFunc", "Reference to custom distribution, format: `language:keyName=funcName`", H2OTypeConverters.toNullableString()) customMetricFunc = Param( Params._dummy(), "customMetricFunc", "Reference to custom evaluation function, format: `language:keyName=funcName`", H2OTypeConverters.toNullableString()) maxRuntimeSecs = Param( Params._dummy(), "maxRuntimeSecs", "Maximum allowed runtime in seconds for model training. Use 0 to disable.", H2OTypeConverters.toFloat()) foldAssignment = Param( Params._dummy(), "foldAssignment", "Cross-validation fold assignment scheme, if fold_column is not specified. The 'Stratified' option will " "stratify the folds based on the response variable, for classification problems.", H2OTypeConverters.toEnumString( "hex.Model$Parameters$FoldAssignmentScheme")) exportCheckpointsDir = Param( Params._dummy(), "exportCheckpointsDir", "Automatically export generated models to this directory.", H2OTypeConverters.toNullableString()) maxAfterBalanceSize = Param( Params._dummy(), "maxAfterBalanceSize", "Maximum relative size of the training data after balancing class counts (can be less than 1.0). " "Requires balance_classes.", H2OTypeConverters.toFloat()) calibrateModel = Param( Params._dummy(), "calibrateModel", "Use Platt Scaling to calculate calibrated class probabilities. Calibration can provide more accurate " "estimates of class probabilities.", H2OTypeConverters.toBoolean()) ignoredCols = Param(Params._dummy(), "ignoredCols", "Names of columns to ignore for training.", H2OTypeConverters.toNullableListString()) ignoreConstCols = Param(Params._dummy(), "ignoreConstCols", "Ignore constant columns.", H2OTypeConverters.toBoolean()) balanceClasses = Param( Params._dummy(), "balanceClasses", "Balance training data class counts via over/under-sampling (for imbalanced data).", H2OTypeConverters.toBoolean()) huberAlpha = Param( Params._dummy(), "huberAlpha", "Desired quantile for Huber/M-regression (threshold between quadratic and linear loss," " must be between 0 and 1).", H2OTypeConverters.toFloat()) tweediePower = Param( Params._dummy(), "tweediePower", "Tweedie power for Tweedie regression, must be between 1 and 2.", H2OTypeConverters.toFloat()) scoreEachIteration = Param( Params._dummy(), "scoreEachIteration", "Whether to score during each iteration of model training.", H2OTypeConverters.toBoolean()) categoricalEncoding = Param( Params._dummy(), "categoricalEncoding", "Encoding scheme for categorical features", H2OTypeConverters.toEnumString( "hex.Model$Parameters$CategoricalEncodingScheme")) maxCategoricalLevels = Param( Params._dummy(), "maxCategoricalLevels", "For every categorical feature, only use this many most frequent categorical levels for model training. " "Only used for categorical_encoding == EnumLimited.", H2OTypeConverters.toInt()) keepCrossValidationModels = Param( Params._dummy(), "keepCrossValidationModels", "Whether to keep the cross-validation models.", H2OTypeConverters.toBoolean()) balanceClasses = Param( Params._dummy(), "balanceClasses", "Balance training data class counts via over/under-sampling (for imbalanced data).", H2OTypeConverters.toBoolean()) ## # Getters ## def getLearnRate(self): return self.getOrDefault(self.learnRate) def getLearnRateAnnealing(self): return self.getOrDefault(self.learnRateAnnealing) def getColSampleRate(self): return self.getOrDefault(self.colSampleRate) def getMaxAbsLeafnodePred(self): return self.getOrDefault(self.maxAbsLeafnodePred) def getPredNoiseBandwidth(self): return self.getOrDefault(self.predNoiseBandwidth) def getClassSamplingFactors(self): return self.getOrDefault(self.classSamplingFactors) def getCheckConstantResponse(self): return self.getOrDefault(self.checkConstantResponse) def getCustomDistributionFunc(self): return self.getOrDefault(self.customDistributionFunc) def getCustomMetricFunc(self): return self.getOrDefault(self.customMetricFunc) def getMaxRuntimeSecs(self): return self.getOrDefault(self.maxRuntimeSecs) def getFoldAssignment(self): return self.getOrDefault(self.foldAssignment) def getExportCheckpointsDir(self): return self.getOrDefault(self.exportCheckpointsDir) def getMaxAfterBalanceSize(self): return self.getOrDefault(self.maxAfterBalanceSize) def getCalibrateModel(self): return self.getOrDefault(self.calibrateModel) def getIgnoredCols(self): return self.getOrDefault(self.ignoredCols) def getIgnoreConstCols(self): return self.getOrDefault(self.ignoreConstCols) def getBalanceClasses(self): return self.getOrDefault(self.balanceClasses) def getHuberAlpha(self): return self.getOrDefault(self.huberAlpha) def getTweediePower(self): return self.getOrDefault(self.tweediePower) def getScoreEachIteration(self): return self.getOrDefault(self.scoreEachIteration) def getCategoricalEncoding(self): return self.getOrDefault(self.categoricalEncoding) def getMaxCategoricalLevels(self): return self.getOrDefault(self.maxCategoricalLevels) def getKeepCrossValidationModels(self): return self.getOrDefault(self.keepCrossValidationModels) def getBalanceClasses(self): return self.getOrDefault(self.balanceClasses) ## # Setters ## def setLearnRate(self, value): return self._set(learnRate=value) def setLearnRateAnnealing(self, value): return self._set(learnRateAnnealing=value) def setColSampleRate(self, value): return self._set(colSampleRate=value) def setMaxAbsLeafnodePred(self, value): return self._set(maxAbsLeafnodePred=value) def setPredNoiseBandwidth(self, value): return self._set(predNoiseBandwidth=value) def setClassSamplingFactors(self, value): return self._set(classSamplingFactors=value) def setCheckConstantResponse(self, value): return self._set(checkConstantResponse=value) def setCustomDistributionFunc(self, value): return self._set(customDistributionFunc=value) def setCustomMetricFunc(self, value): return self._set(customMetricFunc=value) def setMaxRuntimeSecs(self, value): return self._set(maxRuntimeSecs=value) def setFoldAssignment(self, value): return self._set(foldAssignment=value) def setExportCheckpointsDir(self, value): return self._set(exportCheckpointsDir=value) def setMaxAfterBalanceSize(self, value): return self._set(maxAfterBalanceSize=value) def setCalibrateModel(self, value): return self._set(calibrateModel=value) def setIgnoredCols(self, value): return self._set(ignoredCols=value) def setIgnoreConstCols(self, value): return self._set(ignoreConstCols=value) def setBalanceClasses(self, value): return self._set(balanceClasses=value) def setHuberAlpha(self, value): return self._set(huberAlpha=value) def setTweediePower(self, value): return self._set(tweediePower=value) def setScoreEachIteration(self, value): return self._set(scoreEachIteration=value) def setCategoricalEncoding(self, value): return self._set(categoricalEncoding=value) def setMaxCategoricalLevels(self, value): return self._set(maxCategoricalLevels=value) def setKeepCrossValidationModels(self, value): return self._set(keepCrossValidationModels=value) def setBalanceClasses(self, value): return self._set(balanceClasses=value)
class H2OGLMParams(H2OAlgoSupervisedParams): ## # Param definitions ## standardize = Param( Params._dummy(), "standardize", "standardize", H2OTypeConverters.toBoolean()) family = Param( Params._dummy(), "family", "family", H2OTypeConverters.toEnumString("hex.glm.GLMModel$GLMParameters$Family")) link = Param( Params._dummy(), "link", "link", H2OTypeConverters.toEnumString("hex.glm.GLMModel$GLMParameters$Link")) solver = Param( Params._dummy(), "solver", "solver", H2OTypeConverters.toEnumString("hex.glm.GLMModel$GLMParameters$Solver")) tweedieVariancePower = Param( Params._dummy(), "tweedieVariancePower", "Tweedie variance power", H2OTypeConverters.toFloat()) tweedieLinkPower = Param( Params._dummy(), "tweedieLinkPower", "Tweedie link power", H2OTypeConverters.toFloat()) alphaValue = Param( Params._dummy(), "alphaValue", "alphaValue", H2OTypeConverters.toNullableListFloat()) lambdaValue = Param( Params._dummy(), "lambdaValue", "lambdaValue", H2OTypeConverters.toNullableListFloat()) missingValuesHandling = Param( Params._dummy(), "missingValuesHandling", "missingValuesHandling", H2OTypeConverters.toEnumString( "hex.deeplearning.DeepLearningModel$DeepLearningParameters$MissingValuesHandling")) prior = Param( Params._dummy(), "prior", "prior", H2OTypeConverters.toFloat()) lambdaSearch = Param( Params._dummy(), "lambdaSearch", "lambda search", H2OTypeConverters.toBoolean()) nlambdas = Param( Params._dummy(), "nlambdas", "nlambdas", H2OTypeConverters.toInt()) nonNegative = Param( Params._dummy(), "nonNegative", "nonNegative", H2OTypeConverters.toBoolean()) exactLambdas = Param( Params._dummy(), "exactLambdas", "exact lambdas", H2OTypeConverters.toBoolean()) lambdaMinRatio = Param( Params._dummy(), "lambdaMinRatio", "lambdaMinRatio", H2OTypeConverters.toFloat()) maxIterations = Param( Params._dummy(), "maxIterations", "maxIterations", H2OTypeConverters.toInt()) intercept = Param( Params._dummy(), "intercept", "intercept", H2OTypeConverters.toBoolean()) betaEpsilon = Param( Params._dummy(), "betaEpsilon", "betaEpsilon", H2OTypeConverters.toFloat()) objectiveEpsilon = Param( Params._dummy(), "objectiveEpsilon", "objectiveEpsilon", H2OTypeConverters.toFloat()) gradientEpsilon = Param( Params._dummy(), "gradientEpsilon", "gradientEpsilon", H2OTypeConverters.toFloat()) objReg = Param( Params._dummy(), "objReg", "objReg", H2OTypeConverters.toFloat()) computePValues = Param( Params._dummy(), "computePValues", "computePValues", H2OTypeConverters.toBoolean()) removeCollinearCols = Param( Params._dummy(), "removeCollinearCols", "removeCollinearCols", H2OTypeConverters.toBoolean()) interactions = Param( Params._dummy(), "interactions", "interactions", H2OTypeConverters.toNullableListString()) interactionPairs = Param( Params._dummy(), "interactionPairs", "interactionPairs") earlyStopping = Param( Params._dummy(), "earlyStopping", "earlyStopping", H2OTypeConverters.toBoolean()) ## # Getters ## def getStandardize(self): return self.getOrDefault(self.standardize) def getFamily(self): return self.getOrDefault(self.family) def getLink(self): return self.getOrDefault(self.link) def getSolver(self): return self.getOrDefault(self.solver) def getTweedieVariancePower(self): return self.getOrDefault(self.tweedieVariancePower) def getTweedieLinkPower(self): return self.getOrDefault(self.tweedieLinkPower) def getAlphaValue(self): return self.getOrDefault(self.alphaValue) def getLambdaValue(self): return self.getOrDefault(self.lambdaValue) def getMissingValuesHandling(self): return self.getOrDefault(self.missingValuesHandling) def getPrior(self): return self.getOrDefault(self.prior) def getLambdaSearch(self): return self.getOrDefault(self.lambdaSearch) def getNlambdas(self): return self.getOrDefault(self.nlambdas) def getNonNegative(self): return self.getOrDefault(self.nonNegative) def getExactLambdas(self): return self.getOrDefault(self.exactLambdas) def getLambdaMinRatio(self): return self.getOrDefault(self.lambdaMinRatio) def getMaxIterations(self): return self.getOrDefault(self.maxIterations) def getIntercept(self): return self.getOrDefault(self.intercept) def getBetaEpsilon(self): return self.getOrDefault(self.betaEpsilon) def getObjectiveEpsilon(self): return self.getOrDefault(self.objectiveEpsilon) def getGradientEpsilon(self): return self.getOrDefault(self.gradientEpsilon) def getObjReg(self): return self.getOrDefault(self.objReg) def getComputePValues(self): return self.getOrDefault(self.computePValues) def getRemoveCollinearCols(self): return self.getOrDefault(self.removeCollinearCols) def getInteractions(self): return self.getOrDefault(self.interactions) def getInteractionPairs(self): return self.getOrDefault(self.interactionPairs) def getEarlyStopping(self): return self.getOrDefault(self.earlyStopping) ## # Setters ## def setStandardize(self, value): return self._set(standardize=value) def setFamily(self, value): return self._set(family=value) def setLink(self, value): return self._set(link=value) def setSolver(self, value): return self._set(solver=value) def setTweedieVariancePower(self, value): return self._set(tweedieVariancePower=value) def setTweedieLinkPower(self, value): return self._set(tweedieLinkPower=value) def setAlphaValue(self, value): return self._set(alphaValue=value) def setLambdaValue(self, value): return self._set(lambdaValue=value) def setMissingValuesHandling(self, value): return self._set(missingValuesHandling=value) def setPrior(self, value): return self._set(prior=value) def setLambdaSearch(self, value): return self._set(lambdaSearch=value) def setNlambdas(self, value): return self._set(nlambdas=value) def setNonNegative(self, value): return self._set(nonNegative=value) def setExactLambdas(self, value): return self._set(exactLambdas=value) def setLambdaMinRatio(self, value): return self._set(lambdaMinRatio=value) def setMaxIterations(self, value): return self._set(maxIterations=value) def setIntercept(self, value): return self._set(intercept=value) def setBetaEpsilon(self, value): return self._set(betaEpsilon=value) def setObjectiveEpsilon(self, value): return self._set(objectiveEpsilon=value) def setGradientEpsilon(self, value): return self._set(gradientEpsilon=value) def setObjReg(self, value): return self._set(objReg=value) def setComputePValues(self, value): return self._set(computePValues=value) def setRemoveCollinearCols(self, value): return self._set(removeCollinearCols=value) def setInteractions(self, value): return self._set(interactions=value) def setInteractionPairs(self, value): assert_is_type(value, None, [(str, str)]) return self._set(interactionPairs=value) def setEarlyStopping(self, value): return self._set(earlyStopping=value)
class H2OAutoMLParams(H2OCommonSupervisedParams, HasMonotoneConstraints): ## # Param definitions ## ignoredCols = Param( Params._dummy(), "ignoredCols", "Ignored column names", H2OTypeConverters.toNullableListString()) includeAlgos = Param( Params._dummy(), "includeAlgos", "Algorithms to include when using automl", H2OTypeConverters.toEnumListString("ai.h2o.automl.Algo", True)) excludeAlgos = Param( Params._dummy(), "excludeAlgos", "Algorithms to exclude when using automl", H2OTypeConverters.toEnumListString("ai.h2o.automl.Algo", True)) projectName = Param( Params._dummy(), "projectName", "identifier for models that should be grouped together in the leaderboard " "(e.g., airlines and iris)", H2OTypeConverters.toNullableString()) maxRuntimeSecs = Param( Params._dummy(), "maxRuntimeSecs", "Maximum time in seconds for automl to be running", H2OTypeConverters.toFloat()) stoppingRounds = Param( Params._dummy(), "stoppingRounds", "Stopping rounds", H2OTypeConverters.toInt()) stoppingTolerance = Param( Params._dummy(), "stoppingTolerance", "Stopping tolerance", H2OTypeConverters.toFloat()) stoppingMetric = Param( Params._dummy(), "stoppingMetric", "Stopping metric", H2OTypeConverters.toEnumString("hex.ScoreKeeper$StoppingMetric")) sortMetric = Param( Params._dummy(), "sortMetric", "Sort metric for the AutoML leaderboard", H2OTypeConverters.toEnumString("ai.h2o.sparkling.ml.utils.H2OAutoMLSortMetric")) balanceClasses = Param( Params._dummy(), "balanceClasses", "Balance classes", H2OTypeConverters.toBoolean()) classSamplingFactors = Param( Params._dummy(), "classSamplingFactors", "Class sampling factors", H2OTypeConverters.toNullableListFloat()) maxAfterBalanceSize = Param( Params._dummy(), "maxAfterBalanceSize", "Max after balance size", H2OTypeConverters.toFloat()) keepCrossValidationPredictions = Param( Params._dummy(), "keepCrossValidationPredictions", "Keep cross validation predictions", H2OTypeConverters.toBoolean()) keepCrossValidationModels = Param( Params._dummy(), "keepCrossValidationModels", "Keep cross validation models", H2OTypeConverters.toBoolean()) keepCrossValidationFoldAssignment = Param( Params._dummy(), "keepCrossValidationFoldAssignment", "Whether to keep cross-validation assignments.", H2OTypeConverters.toBoolean()) maxModels = Param( Params._dummy(), "maxModels", "Max models to train in AutoML", H2OTypeConverters.toInt()) maxRuntimeSecsPerModel = Param( Params._dummy(), "maxRuntimeSecsPerModel", "Maximum time to spend on each individual model (optional).", H2OTypeConverters.toFloat()) exportCheckpointsDir = Param( Params._dummy(), "exportCheckpointsDir", "Path to a directory where every generated model will be stored.", H2OTypeConverters.toNullableString()) exploitationRatio = Param( Params._dummy(), "exploitationRatio", "The budget ratio (between 0 and 1) dedicated to the exploitation (vs exploration) phase.", H2OTypeConverters.toFloat()) ## # Getters ## def getIgnoredCols(self): return self.getOrDefault(self.ignoredCols) def getTryMutations(self): return self.getOrDefault(self.tryMutations) def getExcludeAlgos(self): return self.getOrDefault(self.excludeAlgos) def getIncludeAlgos(self): return self.getOrDefault(self.includeAlgos) def getProjectName(self): return self.getOrDefault(self.projectName) def getMaxRuntimeSecs(self): return self.getOrDefault(self.maxRuntimeSecs) def getStoppingRounds(self): return self.getOrDefault(self.stoppingRounds) def getStoppingTolerance(self): return self.getOrDefault(self.stoppingTolerance) def getStoppingMetric(self): return self.getOrDefault(self.stoppingMetric) def getSortMetric(self): return self.getOrDefault(self.sortMetric) def getBalanceClasses(self): return self.getOrDefault(self.balanceClasses) def getClassSamplingFactors(self): return self.getOrDefault(self.classSamplingFactors) def getMaxAfterBalanceSize(self): return self.getOrDefault(self.maxAfterBalanceSize) def getKeepCrossValidationPredictions(self): return self.getOrDefault(self.keepCrossValidationPredictions) def getKeepCrossValidationModels(self): return self.getOrDefault(self.keepCrossValidationModels) def getKeepCrossValidationFoldAssignment(self): return self.getOrDefault(self.keepCrossValidationFoldAssignment) def getMaxModels(self): return self.getOrDefault(self.maxModels) def getMaxRuntimeSecsPerModel(self): return self.getOrDefault(self.maxRuntimeSecsPerModel) def getExportCheckpointsDir(self): return self.getOrDefault(self.exportCheckpointsDir) def getExploitationRatio(self): return self.getOrDefault(self.exploitationRatio) ## # Setters ## def setIgnoredCols(self, value): return self._set(ignoredCols=value) def setTryMutations(self, value): return self._set(tryMutations=value) def setIncludeAlgos(self, value): return self._set(includeAlgos=value) def setExcludeAlgos(self, value): return self._set(excludeAlgos=value) def setProjectName(self, value): return self._set(projectName=value) def setMaxRuntimeSecs(self, value): return self._set(maxRuntimeSecs=value) def setStoppingRounds(self, value): return self._set(stoppingRounds=value) def setStoppingTolerance(self, value): return self._set(stoppingTolerance=value) def setStoppingMetric(self, value): return self._set(stoppingMetric=value) def setSortMetric(self, value): return self._set(sortMetric=value) def setBalanceClasses(self, value): return self._set(balanceClasses=value) def setClassSamplingFactors(self, value): return self._set(classSamplingFactors=value) def setMaxAfterBalanceSize(self, value): return self._set(maxAfterBalanceSize=value) def setKeepCrossValidationPredictions(self, value): return self._set(keepCrossValidationPredictions=value) def setKeepCrossValidationModels(self, value): return self._set(keepCrossValidationModels=value) def setKeepCrossValidationFoldAssignment(self, value): return self._set(keepCrossValidationFoldAssignment=value) def setMaxModels(self, value): return self._set(maxModels=value) def setMaxRuntimeSecsPerModel(self, value): return self._set(maxRuntimeSecsPerModel=value) def setExportCheckpointsDir(self, value): return self._set(exportCheckpointsDir=value) def setExploitationRatio(self, value): return self._set(exploitationRatio=value)
class H2OTargetEncoderParams(Params): ## # Param definitions ## foldCol = Param(Params._dummy(), "foldCol", "Fold column name", H2OTypeConverters.toNullableString()) labelCol = Param(Params._dummy(), "labelCol", "Label column name", H2OTypeConverters.toString()) inputCols = Param(Params._dummy(), "inputCols", "Names of columns that will be transformed", H2OTypeConverters.toListString()) outputCols = Param( Params._dummy(), "outputCols", "Names of columns representing the result of target encoding", H2OTypeConverters.toListString()) holdoutStrategy = Param( Params._dummy(), "holdoutStrategy", """A strategy deciding what records will be excluded when calculating the target average on the training dataset. Options: None - All rows are considered for the calculation LeaveOneOut - All rows except the row the calculation is made for KFold - Only out-of-fold data is considered (The option requires foldCol to be set.""", H2OTypeConverters.toEnumString( "ai.h2o.targetencoding.TargetEncoder$DataLeakageHandlingStrategy")) blendedAvgEnabled = Param( Params._dummy(), "blendedAvgEnabled", "If set, the target average becomes a weighted average of the posterior average for a given " "categorical level and the prior average of the target. The weight is determined by the size " "of the given group that the row belongs to. By default, the blended average is disabled.", H2OTypeConverters.toBoolean()) blendedAvgInflectionPoint = Param( Params._dummy(), "blendedAvgInflectionPoint", "A parameter of the blended average. The bigger number is set, the groups relatively bigger to the " "overall data set size will consider the global target value as a component in the weighted average. " "The default value is 10." "", H2OTypeConverters.toFloat()) blendedAvgSmoothing = Param( Params._dummy(), "blendedAvgSmoothing", "A parameter of blended average. Controls the rate of transition between a group target value " "and a global target value. The default value is 20.", H2OTypeConverters.toFloat()) noise = Param( Params._dummy(), "noise", "Amount of random noise added to output values. The default value is 0.01", H2OTypeConverters.toFloat()) noiseSeed = Param(Params._dummy(), "noiseSeed", "A seed of the generator producing the random noise", H2OTypeConverters.toInt()) ## # Getters ## def getFoldCol(self): return self.getOrDefault(self.foldCol) def getLabelCol(self): return self.getOrDefault(self.labelCol) def getInputCols(self): return self.getOrDefault(self.inputCols) def getOutputCols(self): columns = self.getOrDefault(self.outputCols) if not columns: return list(map(lambda c: c + "_te", self.getInputCols())) else: return columns def getHoldoutStrategy(self): return self.getOrDefault(self.holdoutStrategy) def getBlendedAvgEnabled(self): return self.getOrDefault(self.blendedAvgEnabled) def getBlendedAvgInflectionPoint(self): return self.getOrDefault(self.blendedAvgInflectionPoint) def getBlendedAvgSmoothing(self): return self.getOrDefault(self.blendedAvgSmoothing) def getNoise(self): return self.getOrDefault(self.noise) def getNoiseSeed(self): return self.getOrDefault(self.noiseSeed)
class H2OGridSearchParams(H2OGridSearchRandomDiscreteCriteriaParams, H2OGridSearchCartesianCriteriaParams, H2OGridSearchCommonCriteriaParams): ## # Param definitions ## algo = Param(Params._dummy(), "algo", "Algo to run grid search on", H2OTypeConverters.toH2OGridSearchSupportedAlgo()) hyperParameters = Param(Params._dummy(), "hyperParameters", "Grid Search Hyper Params map", H2OTypeConverters.toDictionaryWithAnyElements()) selectBestModelBy = Param( Params._dummy(), "selectBestModelBy", "Specifies the metric which is used for comparing and sorting the models returned by the grid.", H2OTypeConverters.toEnumString( "ai.h2o.sparkling.ml.internals.H2OMetric")) parallelism = Param( Params._dummy(), "parallelism", """Level of model-building parallelism, the possible values are: 0 -> H2O selects parallelism level based on cluster configuration, such as number of cores 1 -> Sequential model building, no parallelism n>1 -> n models will be built in parallel if possible""", H2OTypeConverters.toInt()) ## # Getters ## def getAlgo(self): javaAlgo = self._java_obj.getAlgo() if javaAlgo is None: return None algoName = javaAlgo.parameters().algoName() if algoName == "GBM": from ai.h2o.sparkling.ml.algos import H2OGBM algo = H2OGBM() elif algoName == "DeepLearning": from ai.h2o.sparkling.ml.algos import H2ODeepLearning algo = H2ODeepLearning() elif algoName == "XGBoost": from ai.h2o.sparkling.ml.algos import H2OXGBoost algo = H2OXGBoost() elif algoName == "GLM": from ai.h2o.sparkling.ml.algos import H2OGLM algo = H2OGLM() elif algoName == "DRF": from ai.h2o.sparkling.ml.algos import H2ODRF algo = H2ODRF() elif algoName == "KMeans": from ai.h2o.sparkling.ml.algos import H2OKMeans algo = H2OKMeans() else: raise ValueError('Unsupported algorithm for H2OGridSearch') algo._resetUid(javaAlgo.uid()) algo._java_obj = javaAlgo algo._transfer_params_from_java() return algo def getHyperParameters(self): return self.getOrDefault(self.hyperParameters) def getSelectBestModelBy(self): return self.getOrDefault(self.selectBestModelBy) def getParallelism(self): return self.getOrDefault(self.parallelism) ## # Setters ## def setAlgo(self, value): self._set(algo=value) self._transfer_params_to_java() return self def setHyperParameters(self, value): return self._set(hyperParameters=value) def setSelectBestModelBy(self, value): return self._set(selectBestModelBy=value) def setParallelism(self, value): return self._set(parallelism=value)
class H2OKMeansParams(H2OAlgoUnsupervisedParams): maxIterations = Param( Params._dummy(), "maxIterations", "Maximum number of KMeans iterations to find the centroids.", H2OTypeConverters.toInt()) standardize = Param( Params._dummy(), "standardize", "Standardize the numeric columns to have a mean of zero and unit variance.", H2OTypeConverters.toBoolean()) init = Param( Params._dummy(), "init", "Initialization mode for finding the initial cluster centers.", H2OTypeConverters.toEnumString("hex.kmeans.KMeans$Initialization")) userPoints = Param( Params._dummy(), "userPoints", "This option enables to specify array of points, where each point represents coordinates of " "an initial cluster center. The user-specified points must have the same number of columns " "as the training observations. The number of rows must equal the number of clusters.", H2OTypeConverters.toNullableListListFloat()) estimateK = Param( Params._dummy(), "estimateK", "If enabled, the algorithm tries to identify optimal number of clusters, up to k clusters.", H2OTypeConverters.toBoolean()) k = Param( Params._dummy(), "k", "Number of clusters to generate.", H2OTypeConverters.toInt()) # # Getters # def getMaxIterations(self): return self.getOrDefault(self.maxIterations) def getStandardize(self): return self.getOrDefault(self.standardize) def getInit(self): return self.getOrDefault(self.init) def getUserPoints(self): return self.getOrDefault(self.userPoints) def getEstimateK(self): return self.getOrDefault(self.estimateK) def getK(self): return self.getOrDefault(self.k) # # Setters # def setMaxIterations(self, value): return self._set(maxIterations=value) def setStandardize(self, value): return self._set(standardize=value) def setInit(self, value): return self._set(init=value) def setUserPoints(self, value): return self._set(userPoints=value) def setEstimateK(self, value): return self._set(estimateK=value) def setK(self, value): return self._set(k=value)
class H2OSharedTreeParams(H2OAlgoSupervisedParams, H2OTreeBasedSupervisedMOJOParams): ## # Param definitions ## maxDepth = Param(Params._dummy(), "maxDepth", "Maximum tree depth", H2OTypeConverters.toInt()) minRows = Param(Params._dummy(), "minRows", "Fewest allowed (weighted) observations in a leaf", H2OTypeConverters.toFloat()) nbins = Param( Params._dummy(), "nbins", "For numerical columns (real/int), build a histogram of (at least) this many bins, then split " "at the best point", H2OTypeConverters.toInt()) nbinsCats = Param( Params._dummy(), "nbinsCats", "For categorical columns (factors), build a histogram of this many bins, then split at the best " "point. Higher values can lead to more overfitting", H2OTypeConverters.toInt()) minSplitImprovement = Param( Params._dummy(), "minSplitImprovement", "Minimum relative improvement in squared error reduction for a split to happen", H2OTypeConverters.toFloat()) histogramType = Param( Params._dummy(), "histogramType", "What type of histogram to use for finding optimal split points", H2OTypeConverters.toEnumString( "hex.tree.SharedTreeModel$SharedTreeParameters$HistogramType")) r2Stopping = Param( Params._dummy(), "r2Stopping", "r2_stopping is no longer supported and will be ignored if set - please use stopping_rounds, " "stopping_metric and stopping_tolerance instead. Previous version of H2O would stop making trees " "when the R^2 metric equals or exceeds this", H2OTypeConverters.toFloat()) nbinsTopLevel = Param( Params._dummy(), "nbinsTopLevel", "For numerical columns (real/int), build a histogram of (at most) this many bins at the root " "level, then decrease by factor of two per level", H2OTypeConverters.toInt()) buildTreeOneNode = Param( Params._dummy(), "buildTreeOneNode", "Run on one node only; no network overhead but fewer cpus used. Suitable for small datasets.", H2OTypeConverters.toBoolean()) scoreTreeInterval = Param( Params._dummy(), "scoreTreeInterval", "Score the model after every so many trees. Disabled if set to 0.", H2OTypeConverters.toInt()) sampleRate = Param(Params._dummy(), "sampleRate", "Row sample rate per tree (from 0.0 to 1.0)", H2OTypeConverters.toFloat()) sampleRatePerClass = Param( Params._dummy(), "sampleRatePerClass", "A list of row sample rates per class (relative fraction for each class, from 0.0 to 1.0), for each tree", H2OTypeConverters.toNullableListFloat()) colSampleRateChangePerLevel = Param( Params._dummy(), "colSampleRateChangePerLevel", "Relative change of the column sampling rate for every level (from 0.0 to 2.0)", H2OTypeConverters.toFloat()) colSampleRatePerTree = Param( Params._dummy(), "colSampleRatePerTree", "Column sample rate per tree (from 0.0 to 1.0)", H2OTypeConverters.toFloat()) ## # Getters ## def getMaxDepth(self): return self.getOrDefault(self.maxDepth) def getMinRows(self): return self.getOrDefault(self.minRows) def getNbins(self): return self.getOrDefault(self.nbins) def getNbinsCats(self): return self.getOrDefault(self.nbinsCats) def getMinSplitImprovement(self): return self.getOrDefault(self.minSplitImprovement) def getHistogramType(self): return self.getOrDefault(self.histogramType) def getR2Stopping(self): return self.getOrDefault(self.r2Stopping) def getNbinsTopLevel(self): return self.getOrDefault(self.nbinsTopLevel) def getBuildTreeOneNode(self): return self.getOrDefault(self.buildTreeOneNode) def getScoreTreeInterval(self): return self.getOrDefault(self.scoreTreeInterval) def getSampleRate(self): return self.getOrDefault(self.sampleRate) def getSampleRatePerClass(self): return self.getOrDefault(self.sampleRatePerClass) def getColSampleRateChangePerLevel(self): return self.getOrDefault(self.colSampleRateChangePerLevel) def getColSampleRatePerTree(self): return self.getOrDefault(self.colSampleRatePerTree) ## # Setters ## def setNtrees(self, value): return self._set(ntrees=value) def setMaxDepth(self, value): return self._set(maxDepth=value) def setMinRows(self, value): return self._set(minRows=value) def setNbins(self, value): return self._set(nbins=value) def setNbinsCats(self, value): return self._set(nbinsCats=value) def setMinSplitImprovement(self, value): return self._set(minSplitImprovement=value) def setHistogramType(self, value): return self._set(histogramType=value) def setR2Stopping(self, value): return self._set(r2Stopping=value) def setNbinsTopLevel(self, value): return self._set(nbinsTopLevel=value) def setBuildTreeOneNode(self, value): return self._set(buildTreeOneNode=value) def setScoreTreeInterval(self, value): return self._set(scoreTreeInterval=value) def setSampleRate(self, value): return self._set(sampleRate=value) def setSampleRatePerClass(self, value): return self._set(sampleRatePerClass=value) def setColSampleRateChangePerLevel(self, value): return self._set(colSampleRateChangePerLevel=value) def setColSampleRatePerTree(self, value): return self._set(colSampleRatePerTree=value)
class H2OCommonParams(H2OMOJOAlgoSharedParams): foldCol = Param(Params._dummy(), "foldCol", "Fold column name", H2OTypeConverters.toNullableString()) weightCol = Param(Params._dummy(), "weightCol", "Weight column name", H2OTypeConverters.toNullableString()) splitRatio = Param( Params._dummy(), "splitRatio", "Accepts values in range [0, 1.0] which determine how large part of dataset is used for training" " and for validation. For example, 0.8 -> 80% training 20% validation.", H2OTypeConverters.toFloat()) seed = Param(Params._dummy(), "seed", "Used to specify seed to reproduce the model run", H2OTypeConverters.toInt()) nfolds = Param(Params._dummy(), "nfolds", "Number of fold columns", H2OTypeConverters.toInt()) allStringColumnsToCategorical = Param( Params._dummy(), "allStringColumnsToCategorical", "Transform all strings columns to categorical", H2OTypeConverters.toBoolean()) columnsToCategorical = Param( Params._dummy(), "columnsToCategorical", "List of columns to convert to categorical before modelling", H2OTypeConverters.toListString()) ## # Getters ## def getFoldCol(self): return self.getOrDefault(self.foldCol) def getWeightCol(self): return self.getOrDefault(self.weightCol) def getSplitRatio(self): return self.getOrDefault(self.splitRatio) def getSeed(self): return self.getOrDefault(self.seed) def getNfolds(self): return self.getOrDefault(self.nfolds) def getAllStringColumnsToCategorical(self): return self.getOrDefault(self.allStringColumnsToCategorical) def getColumnsToCategorical(self): return self.getOrDefault(self.columnsToCategorical) ## # Setters ## def setFoldCol(self, value): return self._set(foldCol=value) def setWeightCol(self, value): return self._set(weightCol=value) def setSplitRatio(self, value): return self._set(splitRatio=value) def setSeed(self, value): return self._set(seed=value) def setNfolds(self, value): return self._set(nfolds=value) def setAllStringColumnsToCategorical(self, value): return self._set(allStringColumnsToCategorical=value) def setColumnsToCategorical(self, value, *args): assert_is_type(value, [str], str) if isinstance(value, str): prepared_array = [value] else: prepared_array = value for arg in args: prepared_array.append(arg) return self._set(columnsToCategorical=value) # Setters for parameters which are defined on MOJO as well def setPredictionCol(self, value): return self._set(predictionCol=value) def setDetailedPredictionCol(self, value): return self._set(detailedPredictionCol=value) def setWithDetailedPredictionCol(self, value): return self._set(withDetailedPredictionCol=value) def setFeaturesCols(self, value): return self._set(featuresCols=value) def setConvertUnknownCategoricalLevelsToNa(self, value): return self._set(convertUnknownCategoricalLevelsToNa=value) def setConvertInvalidNumbersToNa(self, value): return self._set(convertInvalidNumbersToNa=value) def setNamedMojoOutputColumns(self, value): return self._set(namedMojoOutputColumns=value)
class H2OGridSearchParams(H2OCommonSupervisedParams): ## # Param definitions ## algo = Param(Params._dummy(), "algo", "Algo to run grid search on", H2OTypeConverters.toH2OGridSearchSupportedAlgo()) hyperParameters = Param(Params._dummy(), "hyperParameters", "Grid Search Hyper Params map", H2OTypeConverters.toDictionaryWithAnyElements()) strategy = Param( Params._dummy(), "strategy", "strategy", H2OTypeConverters.toEnumString( "hex.grid.HyperSpaceSearchCriteria$Strategy")) maxRuntimeSecs = Param(Params._dummy(), "maxRuntimeSecs", "maxRuntimeSecs", H2OTypeConverters.toFloat()) maxModels = Param(Params._dummy(), "maxModels", "maxModels", H2OTypeConverters.toInt()) stoppingRounds = Param(Params._dummy(), "stoppingRounds", "stoppingRounds", H2OTypeConverters.toInt()) stoppingTolerance = Param(Params._dummy(), "stoppingTolerance", "stoppingTolerance", H2OTypeConverters.toFloat()) stoppingMetric = Param( Params._dummy(), "stoppingMetric", "stoppingMetric", H2OTypeConverters.toEnumString("hex.ScoreKeeper$StoppingMetric")) selectBestModelBy = Param( Params._dummy(), "selectBestModelBy", "Specifies the metric which is used for comparing and sorting the models returned by the grid.", H2OTypeConverters.toEnumString( "ai.h2o.sparkling.ml.internals.H2OMetric")) parallelism = Param( Params._dummy(), "parallelism", """Level of model-building parallelism, the possible values are: 0 -> H2O selects parallelism level based on cluster configuration, such as number of cores 1 -> Sequential model building, no parallelism n>1 -> n models will be built in parallel if possible""", H2OTypeConverters.toInt()) ## # Getters ## def getAlgo(self): javaAlgo = self.getOrDefault(self.algo) algoName = javaAlgo.parameters().algoName() if algoName == "GBM": from ai.h2o.sparkling.ml.algos import H2OGBM algo = H2OGBM() elif algoName == "DeepLearning": from ai.h2o.sparkling.ml.algos import H2ODeepLearning algo = H2ODeepLearning() elif algoName == "XGBoost": from ai.h2o.sparkling.ml.algos import H2OXGBoost algo = H2OXGBoost() elif algoName == "GLM": from ai.h2o.sparkling.ml.algos import H2OGLM algo = H2OGLM() elif algoName == "DRF": from ai.h2o.sparkling.ml.algos import H2ODRF algo = H2ODRF() else: raise ValueError('Unsupported algorithm for H2OGridSearch') algo._resetUid(javaAlgo.uid()) algo._java_obj = javaAlgo algo._transfer_params_from_java() return algo def getHyperParameters(self): return self.getOrDefault(self.hyperParameters) def getStrategy(self): return self.getOrDefault(self.strategy) def getMaxRuntimeSecs(self): return self.getOrDefault(self.maxRuntimeSecs) def getMaxModels(self): return self.getOrDefault(self.maxModels) def getStoppingRounds(self): return self.getOrDefault(self.stoppingRounds) def getStoppingTolerance(self): return self.getOrDefault(self.stoppingTolerance) def getStoppingMetric(self): return self.getOrDefault(self.stoppingMetric) def getSelectBestModelBy(self): return self.getOrDefault(self.selectBestModelBy) def getParallelism(self): return self.getOrDefault(self.parallelism) ## # Setters ## def setAlgo(self, value): return self._set(algo=value) def setHyperParameters(self, value): return self._set(hyperParameters=value) def setStrategy(self, value): return self._set(strategy=value) def setMaxRuntimeSecs(self, value): return self._set(maxRuntimeSecs=value) def setMaxModels(self, value): return self._set(maxModels=value) def setStoppingRounds(self, value): return self._set(stoppingRounds=value) def setStoppingTolerance(self, value): return self._set(stoppingTolerance=value) def setStoppingMetric(self, value): return self._set(stoppingMetric=value) def setSelectBestModelBy(self, value): return self._set(selectBestModelBy=value) def setParallelism(self, value): return self._set(parallelism=value)
class H2OGridSearchParams(H2OGridSearchRandomDiscreteCriteriaParams, H2OGridSearchCartesianCriteriaParams, H2OGridSearchCommonCriteriaParams): ## # Param definitions ## algo = Param(Params._dummy(), "algo", "Algo to run grid search on", H2OTypeConverters.toH2OGridSearchSupportedAlgo()) hyperParameters = Param(Params._dummy(), "hyperParameters", "Grid Search Hyper Params map", H2OTypeConverters.toDictionaryWithAnyElements()) selectBestModelBy = Param( Params._dummy(), "selectBestModelBy", "Specifies the metric which is used for comparing and sorting the models returned by the grid.", H2OTypeConverters.toEnumString( "ai.h2o.sparkling.ml.internals.H2OMetric")) parallelism = Param( Params._dummy(), "parallelism", """Level of model-building parallelism, the possible values are: 0 -> H2O selects parallelism level based on cluster configuration, such as number of cores 1 -> Sequential model building, no parallelism n>1 -> n models will be built in parallel if possible""", H2OTypeConverters.toInt()) ## # Getters ## def getAlgo(self): javaAlgo = self._java_obj.getAlgo() if javaAlgo is None: return None algoName = javaAlgo.getClass().getSimpleName() if algoName.endswith("Classifier"): import ai.h2o.sparkling.ml.algos.classification algo = getattr(ai.h2o.sparkling.ml.algos.classification, algoName)() elif algoName.endswith("Regressor"): import ai.h2o.sparkling.ml.algos.regression algo = getattr(ai.h2o.sparkling.ml.algos.regression, algoName)() else: import ai.h2o.sparkling.ml.algos algo = getattr(ai.h2o.sparkling.ml.algos, algoName)() algo._resetUid(javaAlgo.uid()) algo._java_obj = javaAlgo algo._transfer_params_from_java() return algo def getHyperParameters(self): return self.getOrDefault(self.hyperParameters) def getSelectBestModelBy(self): return self.getOrDefault(self.selectBestModelBy) def getParallelism(self): return self.getOrDefault(self.parallelism) ## # Setters ## def setAlgo(self, value): self._set(algo=value) self._transfer_params_to_java() return self def setHyperParameters(self, value): return self._set(hyperParameters=value) def setSelectBestModelBy(self, value): return self._set(selectBestModelBy=value) def setParallelism(self, value): return self._set(parallelism=value)