def _model_build(self, x, y, tframe, vframe, kwargs): kwargs['training_frame'] = tframe if vframe is not None: kwargs["validation_frame"] = vframe if isinstance(y, int): y = tframe.names[y] if y is not None: kwargs['response_column'] = y if not isinstance(x, (list,tuple)): x=[x] if isinstance(x[0], int): x = [tframe.names[i] for i in x] offset = kwargs["offset_column"] folds = kwargs["fold_column"] weights= kwargs["weights_column"] ignored_columns = list(set(tframe.names) - set(x + [y,offset,folds,weights])) kwargs["ignored_columns"] = None if ignored_columns==[] else [h2o.h2o._quoted(col) for col in ignored_columns] kwargs = dict([(k, H2OEstimator._keyify_if_H2OFrame(kwargs[k])) for k in kwargs]) algo = self._compute_algo() model = H2OJob(H2OConnection.post_json("ModelBuilders/"+algo, **kwargs), job_type=(algo+" Model Build")) if self._future: self._job = model return model.poll() if '_rest_version' in list(kwargs.keys()): model_json = H2OConnection.get_json("Models/"+model.dest_key, _rest_version=kwargs['_rest_version'])["models"][0] else: model_json = H2OConnection.get_json("Models/"+model.dest_key)["models"][0] self._resolve_model(model.dest_key,model_json)
def get_grid(self, sort_by=None, decreasing=None): """ Retrieve an H2OGridSearch instance. Optionally specify a metric by which to sort models and a sort order. Parameters ---------- sort_by : str, optional A metric by which to sort the models in the grid space. Choices are "logloss", "residual_deviance", "mse", "auc", "r2", "accuracy", "precision", "recall", "f1", etc. decreasing : bool, optional Sort the models in decreasing order of metric if true, otherwise sort in increasing order (default). Returns ------- A new H2OGridSearch instance optionally sorted on the specified metric. """ if sort_by is None and decreasing is None: return self grid_json = H2OConnection.get_json("Grids/"+self._id, sort_by=sort_by, decreasing=decreasing, _rest_version=99) grid = H2OGridSearch(self.model, self.hyper_params, self._id) grid.models = [h2o.get_model(key['name']) for key in grid_json['model_ids']] #reordered first_model_json = H2OConnection.get_json("Models/"+grid_json['model_ids'][0]['name'], _rest_version=99)['models'][0] model_class = H2OGridSearch._metrics_class(first_model_json) m = model_class() m._id = self._id m._grid_json = grid_json # m._metrics_class = metrics_class m._parms = grid._parms H2OEstimator.mixin(grid,model_class) grid.__dict__.update(m.__dict__.copy()) return grid
def _model_build(self, x, y, tframe, vframe, kwargs): kwargs['training_frame'] = tframe if vframe is not None: kwargs["validation_frame"] = vframe if isinstance(y, int): y = tframe.names[y] if y is not None: kwargs['response_column'] = y if not isinstance(x, (list,tuple)): x=[x] if isinstance(x[0], int): x = [tframe.names[i] for i in x] offset = kwargs["offset_column"] folds = kwargs["fold_column"] weights= kwargs["weights_column"] ignored_columns = list(set(tframe.names) - set(x + [y,offset,folds,weights])) kwargs["ignored_columns"] = None if ignored_columns==[] else [h2o.h2o._quoted(col) for col in ignored_columns] kwargs["interactions"] = None if ("interactions" not in kwargs or kwargs["interactions"] is None) else [h2o.h2o._quoted(col) for col in kwargs["interactions"]] kwargs = dict([(k, H2OEstimator._keyify_if_H2OFrame(kwargs[k])) for k in kwargs]) # gruesome one-liner algo = self._compute_algo() model = H2OJob(H2OConnection.post_json("ModelBuilders/"+algo, **kwargs), job_type=(algo+" Model Build")) if self._future: self._job = model return model.poll() if '_rest_version' in list(kwargs.keys()): model_json = H2OConnection.get_json("Models/"+model.dest_key, _rest_version=kwargs['_rest_version'])["models"][0] else: model_json = H2OConnection.get_json("Models/"+model.dest_key)["models"][0] self._resolve_model(model.dest_key,model_json)
def _model_build(self, x, y, tframe, vframe, kwargs): kwargs['training_frame'] = tframe if vframe is not None: kwargs["validation_frame"] = vframe if isinstance(y, int): y = tframe.names[y] if y is not None: kwargs['response_column'] = y if not isinstance(x, (list,tuple)): x=[x] if isinstance(x[0], int): x = [tframe.names[i] for i in x] offset = kwargs["offset_column"] folds = kwargs["fold_column"] weights= kwargs["weights_column"] ignored_columns = list(set(tframe.names) - set(x + [y,offset,folds,weights])) kwargs["ignored_columns"] = None if ignored_columns==[] else [h2o.h2o._quoted(col) for col in ignored_columns] kwargs = dict([(k, kwargs[k].frame_id if isinstance(kwargs[k], H2OFrame) else kwargs[k]) for k in kwargs if kwargs[k] is not None]) # gruesome one-liner algo = self.model._compute_algo() #unique to grid search kwargs["_rest_version"] = 99 #unique to grid search if self.grid_id is not None: kwargs["grid_id"] = self.grid_id grid = H2OJob(H2OConnection.post_json("Grid/"+algo, **kwargs), job_type=(algo+" Grid Build")) if self._future: self._job = grid return grid.poll() if '_rest_version' in list(kwargs.keys()): grid_json = H2OConnection.get_json("Grids/"+grid.dest_key, _rest_version=kwargs['_rest_version']) error_index = 0 if len(grid_json["failure_details"]) > 0: print("Errors/Warnings building gridsearch model\n") for error_message in grid_json["failure_details"]: if isinstance(grid_json["failed_params"][error_index], dict): for h_name in grid_json['hyper_names']: print("Hyper-parameter: {0}, {1}".format(h_name, grid_json['failed_params'][error_index][h_name])) if len(grid_json["failure_stack_traces"]) > error_index: print("failure_details: {0}\nfailure_stack_traces: " "{1}\n".format(error_message, grid_json['failure_stack_traces'][error_index])) error_index += 1 else: grid_json = H2OConnection.get_json("Grids/"+grid.dest_key) self.models = [h2o.get_model(key['name']) for key in grid_json['model_ids']] #get first model returned in list of models from grid search to get model class (binomial, multinomial, etc) # sometimes no model is returned due to bad parameter values provided by the user. if len(grid_json['model_ids']) > 0: first_model_json = H2OConnection.get_json("Models/"+grid_json['model_ids'][0]['name'], _rest_version=kwargs['_rest_version'])['models'][0] self._resolve_grid(grid.dest_key, grid_json, first_model_json) else: raise ValueError("Gridsearch returns no model due to bad parameter values or other reasons....")
def _model_build(self, x, y, tframe, vframe, kwargs): kwargs['training_frame'] = tframe if vframe is not None: kwargs["validation_frame"] = vframe if isinstance(y, int): y = tframe.names[y] if y is not None: kwargs['response_column'] = y if not isinstance(x, (list, tuple)): x = [x] if isinstance(x[0], int): x = [tframe.names[i] for i in x] offset = kwargs["offset_column"] folds = kwargs["fold_column"] weights = kwargs["weights_column"] ignored_columns = list( set(tframe.names) - set(x + [y, offset, folds, weights])) kwargs["ignored_columns"] = None if ignored_columns == [] else [ h2o.h2o._quoted(col) for col in ignored_columns ] kwargs = dict([(k, kwargs[k].frame_id if isinstance( kwargs[k], H2OFrame) else kwargs[k]) for k in kwargs if kwargs[k] is not None]) # gruesome one-liner algo = self.model._compute_algo() #unique to grid search kwargs["_rest_version"] = 99 #unique to grid search if self.grid_id is not None: kwargs["grid_id"] = self.grid_id grid = H2OJob(H2OConnection.post_json("Grid/" + algo, **kwargs), job_type=(algo + " Grid Build")) if self._future: self._job = grid return grid.poll() if '_rest_version' in list(kwargs.keys()): grid_json = H2OConnection.get_json( "Grids/" + grid.dest_key, _rest_version=kwargs['_rest_version']) for error_message in grid_json["failure_details"]: print(error_message) else: grid_json = H2OConnection.get_json("Grids/" + grid.dest_key) self.models = [ h2o.get_model(key['name']) for key in grid_json['model_ids'] ] #get first model returned in list of models from grid search to get model class (binomial, multinomial, etc) first_model_json = H2OConnection.get_json( "Models/" + grid_json['model_ids'][0]['name'], _rest_version=kwargs['_rest_version'])['models'][0] self._resolve_grid(grid.dest_key, grid_json, first_model_json)
def getGLMRegularizationPath(model): x = H2OConnection.get_json("GetGLMRegPath",model=model._model_json['model_id']['name']) ns = x.pop('coefficient_names') res = {'lambdas':x['lambdas'],'explained_deviance_train':x['explained_deviance_train'],'explained_deviance_valid':x['explained_deviance_valid']} res['coefficients'] = [dict(zip(ns,y)) for y in x['coefficients']] if 'coefficients_std' in x: res['coefficients_std'] = [dict(zip(ns,y)) for y in x['coefficients_std']] return res
def _model_build(self, x, y, tframe, vframe, kwargs): kwargs['training_frame'] = tframe if vframe is not None: kwargs["validation_frame"] = vframe if isinstance(y, int): y = tframe.names[y] if y is not None: kwargs['response_column'] = y if not isinstance(x, (list,tuple)): x=[x] if isinstance(x[0], int): x = [tframe.names[i] for i in x] offset = kwargs["offset_column"] folds = kwargs["fold_column"] weights= kwargs["weights_column"] ignored_columns = list(set(tframe.names) - set(x + [y,offset,folds,weights])) kwargs["ignored_columns"] = None if ignored_columns==[] else [h2o.h2o._quoted(col) for col in ignored_columns] kwargs = dict([(k, kwargs[k].frame_id if isinstance(kwargs[k], H2OFrame) else kwargs[k]) for k in kwargs if kwargs[k] is not None]) # gruesome one-liner algo = self.model._compute_algo() #unique to grid search kwargs["_rest_version"] = 99 #unique to grid search if self.grid_id is not None: kwargs["grid_id"] = self.grid_id grid = H2OJob(H2OConnection.post_json("Grid/"+algo, **kwargs), job_type=(algo+" Grid Build")) if self._future: self._job = grid return grid.poll() if '_rest_version' in list(kwargs.keys()): grid_json = H2OConnection.get_json("Grids/"+grid.dest_key, _rest_version=kwargs['_rest_version']) for error_message in grid_json["failure_details"]: print(error_message) else: grid_json = H2OConnection.get_json("Grids/"+grid.dest_key) self.models = [h2o.get_model(key['name']) for key in grid_json['model_ids']] #get first model returned in list of models from grid search to get model class (binomial, multinomial, etc) first_model_json = H2OConnection.get_json("Models/"+grid_json['model_ids'][0]['name'], _rest_version=kwargs['_rest_version'])['models'][0] self._resolve_grid(grid.dest_key, grid_json, first_model_json)
def getGLMRegularizationPath(model): """ Extract full regularization path explored during lambda search from glm model. @param model - source lambda search model """ x = H2OConnection.get_json("GetGLMRegPath", model=model._model_json["model_id"]["name"]) ns = x.pop("coefficient_names") res = { "lambdas": x["lambdas"], "explained_deviance_train": x["explained_deviance_train"], "explained_deviance_valid": x["explained_deviance_valid"], "coefficients": [dict(zip(ns,y)) for y in x["coefficients"]], } if "coefficients_std" in x: res["coefficients_std"] = [dict(zip(ns,y)) for y in x["coefficients_std"]] return res
def getGLMRegularizationPath(model): """ Extract full regularization path explored during lambda search from glm model. @param model - source lambda search model """ x = H2OConnection.get_json("GetGLMRegPath", model=model._model_json["model_id"]["name"]) ns = x.pop("coefficient_names") res = { "lambdas": x["lambdas"], "explained_deviance_train": x["explained_deviance_train"], "explained_deviance_valid": x["explained_deviance_valid"], "coefficients": [dict(zip(ns, y)) for y in x["coefficients"]], } if "coefficients_std" in x: res["coefficients_std"] = [ dict(zip(ns, y)) for y in x["coefficients_std"] ] return res
def _model_build(self, x, y, tframe, vframe, kwargs): kwargs['training_frame'] = tframe if vframe is not None: kwargs["validation_frame"] = vframe if isinstance(y, int): y = tframe.names[y] if y is not None: kwargs['response_column'] = y if not isinstance(x, (list, tuple)): x = [x] if isinstance(x[0], int): x = [tframe.names[i] for i in x] offset = kwargs["offset_column"] folds = kwargs["fold_column"] weights = kwargs["weights_column"] ignored_columns = list( set(tframe.names) - set(x + [y, offset, folds, weights])) kwargs["ignored_columns"] = None if ignored_columns == [] else [ h2o.h2o._quoted(col) for col in ignored_columns ] kwargs = dict([(k, kwargs[k].frame_id if isinstance( kwargs[k], H2OFrame) else kwargs[k]) for k in kwargs if kwargs[k] is not None]) # gruesome one-liner algo = self.model._compute_algo() #unique to grid search kwargs["_rest_version"] = 99 #unique to grid search if self.grid_id is not None: kwargs["grid_id"] = self.grid_id grid = H2OJob(H2OConnection.post_json("Grid/" + algo, **kwargs), job_type=(algo + " Grid Build")) if self._future: self._job = grid return grid.poll() if '_rest_version' in list(kwargs.keys()): grid_json = H2OConnection.get_json( "Grids/" + grid.dest_key, _rest_version=kwargs['_rest_version']) error_index = 0 if len(grid_json["failure_details"]) > 0: print("Errors/Warnings building gridsearch model\n") for error_message in grid_json["failure_details"]: if isinstance(grid_json["failed_params"][error_index], dict): for h_name in grid_json['hyper_names']: print("Hyper-parameter: {0}, {1}".format( h_name, grid_json['failed_params'][error_index] [h_name])) print("failure_details: {0}\nfailure_stack_traces: " "{1}\n".format( error_message, grid_json['failure_stack_traces'][error_index])) error_index += 1 else: grid_json = H2OConnection.get_json("Grids/" + grid.dest_key) self.models = [ h2o.get_model(key['name']) for key in grid_json['model_ids'] ] #get first model returned in list of models from grid search to get model class (binomial, multinomial, etc) # sometimes no model is returned due to bad parameter values provided by the user. if len(grid_json['model_ids']) > 0: first_model_json = H2OConnection.get_json( "Models/" + grid_json['model_ids'][0]['name'], _rest_version=kwargs['_rest_version'])['models'][0] self._resolve_grid(grid.dest_key, grid_json, first_model_json) else: raise ValueError( "Gridsearch returns no model due to bad parameter values or other reasons...." )