def _model_build(self, x, y, tframe, vframe, kwargs): kwargs['training_frame'] = tframe if vframe is not None: kwargs["validation_frame"] = vframe if isinstance(y, int): y = tframe.names[y] if y is not None: kwargs['response_column'] = y if not isinstance(x, (list,tuple)): x=[x] if isinstance(x[0], int): x = [tframe.names[i] for i in x] offset = kwargs["offset_column"] folds = kwargs["fold_column"] weights= kwargs["weights_column"] ignored_columns = list(set(tframe.names) - set(x + [y,offset,folds,weights])) kwargs["ignored_columns"] = None if ignored_columns==[] else [h2o.h2o._quoted(col) for col in ignored_columns] kwargs["interactions"] = None if ("interactions" not in kwargs or kwargs["interactions"] is None) else [h2o.h2o._quoted(col) for col in kwargs["interactions"]] kwargs = dict([(k, H2OEstimator._keyify_if_H2OFrame(kwargs[k])) for k in kwargs]) # gruesome one-liner algo = self._compute_algo() model = H2OJob(H2OConnection.post_json("ModelBuilders/"+algo, **kwargs), job_type=(algo+" Model Build")) if self._future: self._job = model return model.poll() if '_rest_version' in list(kwargs.keys()): model_json = H2OConnection.get_json("Models/"+model.dest_key, _rest_version=kwargs['_rest_version'])["models"][0] else: model_json = H2OConnection.get_json("Models/"+model.dest_key)["models"][0] self._resolve_model(model.dest_key,model_json)
def _model_build(self, x, y, tframe, vframe, kwargs): kwargs['training_frame'] = tframe if vframe is not None: kwargs["validation_frame"] = vframe if isinstance(y, int): y = tframe.names[y] if y is not None: kwargs['response_column'] = y if not isinstance(x, (list,tuple)): x=[x] if isinstance(x[0], int): x = [tframe.names[i] for i in x] offset = kwargs["offset_column"] folds = kwargs["fold_column"] weights= kwargs["weights_column"] ignored_columns = list(set(tframe.names) - set(x + [y,offset,folds,weights])) kwargs["ignored_columns"] = None if ignored_columns==[] else [h2o.h2o._quoted(col) for col in ignored_columns] kwargs = dict([(k, H2OEstimator._keyify_if_H2OFrame(kwargs[k])) for k in kwargs]) algo = self._compute_algo() model = H2OJob(H2OConnection.post_json("ModelBuilders/"+algo, **kwargs), job_type=(algo+" Model Build")) if self._future: self._job = model return model.poll() if '_rest_version' in list(kwargs.keys()): model_json = H2OConnection.get_json("Models/"+model.dest_key, _rest_version=kwargs['_rest_version'])["models"][0] else: model_json = H2OConnection.get_json("Models/"+model.dest_key)["models"][0] self._resolve_model(model.dest_key,model_json)
def makeGLMModel(model, coefs, threshold=.5): model_json = H2OConnection.post_json( "MakeGLMModel", model=model._model_json['model_id']['name'], names=list(coefs.keys()), beta=list(coefs.values()), threshold=threshold) m = H2OGeneralizedLinearEstimator() m._resolve_model(model_json['model_id']['name'], model_json) return m
def _model_build(self, x, y, tframe, vframe, kwargs): kwargs['training_frame'] = tframe if vframe is not None: kwargs["validation_frame"] = vframe if isinstance(y, int): y = tframe.names[y] if y is not None: kwargs['response_column'] = y if not isinstance(x, (list,tuple)): x=[x] if isinstance(x[0], int): x = [tframe.names[i] for i in x] offset = kwargs["offset_column"] folds = kwargs["fold_column"] weights= kwargs["weights_column"] ignored_columns = list(set(tframe.names) - set(x + [y,offset,folds,weights])) kwargs["ignored_columns"] = None if ignored_columns==[] else [h2o.h2o._quoted(col) for col in ignored_columns] kwargs = dict([(k, kwargs[k].frame_id if isinstance(kwargs[k], H2OFrame) else kwargs[k]) for k in kwargs if kwargs[k] is not None]) # gruesome one-liner algo = self.model._compute_algo() #unique to grid search kwargs["_rest_version"] = 99 #unique to grid search if self.grid_id is not None: kwargs["grid_id"] = self.grid_id grid = H2OJob(H2OConnection.post_json("Grid/"+algo, **kwargs), job_type=(algo+" Grid Build")) if self._future: self._job = grid return grid.poll() if '_rest_version' in list(kwargs.keys()): grid_json = H2OConnection.get_json("Grids/"+grid.dest_key, _rest_version=kwargs['_rest_version']) error_index = 0 if len(grid_json["failure_details"]) > 0: print("Errors/Warnings building gridsearch model\n") for error_message in grid_json["failure_details"]: if isinstance(grid_json["failed_params"][error_index], dict): for h_name in grid_json['hyper_names']: print("Hyper-parameter: {0}, {1}".format(h_name, grid_json['failed_params'][error_index][h_name])) if len(grid_json["failure_stack_traces"]) > error_index: print("failure_details: {0}\nfailure_stack_traces: " "{1}\n".format(error_message, grid_json['failure_stack_traces'][error_index])) error_index += 1 else: grid_json = H2OConnection.get_json("Grids/"+grid.dest_key) self.models = [h2o.get_model(key['name']) for key in grid_json['model_ids']] #get first model returned in list of models from grid search to get model class (binomial, multinomial, etc) # sometimes no model is returned due to bad parameter values provided by the user. if len(grid_json['model_ids']) > 0: first_model_json = H2OConnection.get_json("Models/"+grid_json['model_ids'][0]['name'], _rest_version=kwargs['_rest_version'])['models'][0] self._resolve_grid(grid.dest_key, grid_json, first_model_json) else: raise ValueError("Gridsearch returns no model due to bad parameter values or other reasons....")
def makeGLMModel(model, coefs, threshold=.5): """ Create a custom GLM model using the given coefficients. Needs to be passed source model trained on the dataset to extract the dataset information from. @param model - source model, used for extracting dataset information @param coefs - dictionary containing model coefficients @param threshold - (optional, only for binomial) decision threshold used for classification """ model_json = H2OConnection.post_json("MakeGLMModel", model=model._model_json["model_id"]["name"], names=list(coefs.keys()), beta=list(coefs.values()), threshold=threshold) m = H2OGeneralizedLinearEstimator() m._resolve_model(model_json["model_id"]["name"], model_json) return m
def _model_build(self, x, y, tframe, vframe, kwargs): kwargs['training_frame'] = tframe if vframe is not None: kwargs["validation_frame"] = vframe if isinstance(y, int): y = tframe.names[y] if y is not None: kwargs['response_column'] = y if not isinstance(x, (list, tuple)): x = [x] if isinstance(x[0], int): x = [tframe.names[i] for i in x] offset = kwargs["offset_column"] folds = kwargs["fold_column"] weights = kwargs["weights_column"] ignored_columns = list( set(tframe.names) - set(x + [y, offset, folds, weights])) kwargs["ignored_columns"] = None if ignored_columns == [] else [ h2o.h2o._quoted(col) for col in ignored_columns ] kwargs = dict([(k, kwargs[k].frame_id if isinstance( kwargs[k], H2OFrame) else kwargs[k]) for k in kwargs if kwargs[k] is not None]) # gruesome one-liner algo = self.model._compute_algo() #unique to grid search kwargs["_rest_version"] = 99 #unique to grid search if self.grid_id is not None: kwargs["grid_id"] = self.grid_id grid = H2OJob(H2OConnection.post_json("Grid/" + algo, **kwargs), job_type=(algo + " Grid Build")) if self._future: self._job = grid return grid.poll() if '_rest_version' in list(kwargs.keys()): grid_json = H2OConnection.get_json( "Grids/" + grid.dest_key, _rest_version=kwargs['_rest_version']) for error_message in grid_json["failure_details"]: print(error_message) else: grid_json = H2OConnection.get_json("Grids/" + grid.dest_key) self.models = [ h2o.get_model(key['name']) for key in grid_json['model_ids'] ] #get first model returned in list of models from grid search to get model class (binomial, multinomial, etc) first_model_json = H2OConnection.get_json( "Models/" + grid_json['model_ids'][0]['name'], _rest_version=kwargs['_rest_version'])['models'][0] self._resolve_grid(grid.dest_key, grid_json, first_model_json)
def makeGLMModel(model, coefs, threshold=.5): """ Create a custom GLM model using the given coefficients. Needs to be passed source model trained on the dataset to extract the dataset information from. @param model - source model, used for extracting dataset information @param coefs - dictionary containing model coefficients @param threshold - (optional, only for binomial) decision threshold used for classification """ model_json = H2OConnection.post_json( "MakeGLMModel", model=model._model_json["model_id"]["name"], names=list(coefs.keys()), beta=list(coefs.values()), threshold=threshold) m = H2OGeneralizedLinearEstimator() m._resolve_model(model_json["model_id"]["name"], model_json) return m
def _model_build(self, x, y, tframe, vframe, kwargs): kwargs['training_frame'] = tframe if vframe is not None: kwargs["validation_frame"] = vframe if isinstance(y, int): y = tframe.names[y] if y is not None: kwargs['response_column'] = y if not isinstance(x, (list,tuple)): x=[x] if isinstance(x[0], int): x = [tframe.names[i] for i in x] offset = kwargs["offset_column"] folds = kwargs["fold_column"] weights= kwargs["weights_column"] ignored_columns = list(set(tframe.names) - set(x + [y,offset,folds,weights])) kwargs["ignored_columns"] = None if ignored_columns==[] else [h2o.h2o._quoted(col) for col in ignored_columns] kwargs = dict([(k, kwargs[k].frame_id if isinstance(kwargs[k], H2OFrame) else kwargs[k]) for k in kwargs if kwargs[k] is not None]) # gruesome one-liner algo = self.model._compute_algo() #unique to grid search kwargs["_rest_version"] = 99 #unique to grid search if self.grid_id is not None: kwargs["grid_id"] = self.grid_id grid = H2OJob(H2OConnection.post_json("Grid/"+algo, **kwargs), job_type=(algo+" Grid Build")) if self._future: self._job = grid return grid.poll() if '_rest_version' in list(kwargs.keys()): grid_json = H2OConnection.get_json("Grids/"+grid.dest_key, _rest_version=kwargs['_rest_version']) for error_message in grid_json["failure_details"]: print(error_message) else: grid_json = H2OConnection.get_json("Grids/"+grid.dest_key) self.models = [h2o.get_model(key['name']) for key in grid_json['model_ids']] #get first model returned in list of models from grid search to get model class (binomial, multinomial, etc) first_model_json = H2OConnection.get_json("Models/"+grid_json['model_ids'][0]['name'], _rest_version=kwargs['_rest_version'])['models'][0] self._resolve_grid(grid.dest_key, grid_json, first_model_json)
def _model_build(self, x, y, tframe, vframe, kwargs): kwargs['training_frame'] = tframe if vframe is not None: kwargs["validation_frame"] = vframe if isinstance(y, int): y = tframe.names[y] if y is not None: kwargs['response_column'] = y if not isinstance(x, (list, tuple)): x = [x] if isinstance(x[0], int): x = [tframe.names[i] for i in x] offset = kwargs["offset_column"] folds = kwargs["fold_column"] weights = kwargs["weights_column"] ignored_columns = list( set(tframe.names) - set(x + [y, offset, folds, weights])) kwargs["ignored_columns"] = None if ignored_columns == [] else [ h2o.h2o._quoted(col) for col in ignored_columns ] kwargs = dict([(k, kwargs[k].frame_id if isinstance( kwargs[k], H2OFrame) else kwargs[k]) for k in kwargs if kwargs[k] is not None]) # gruesome one-liner algo = self.model._compute_algo() #unique to grid search kwargs["_rest_version"] = 99 #unique to grid search if self.grid_id is not None: kwargs["grid_id"] = self.grid_id grid = H2OJob(H2OConnection.post_json("Grid/" + algo, **kwargs), job_type=(algo + " Grid Build")) if self._future: self._job = grid return grid.poll() if '_rest_version' in list(kwargs.keys()): grid_json = H2OConnection.get_json( "Grids/" + grid.dest_key, _rest_version=kwargs['_rest_version']) error_index = 0 if len(grid_json["failure_details"]) > 0: print("Errors/Warnings building gridsearch model\n") for error_message in grid_json["failure_details"]: if isinstance(grid_json["failed_params"][error_index], dict): for h_name in grid_json['hyper_names']: print("Hyper-parameter: {0}, {1}".format( h_name, grid_json['failed_params'][error_index] [h_name])) print("failure_details: {0}\nfailure_stack_traces: " "{1}\n".format( error_message, grid_json['failure_stack_traces'][error_index])) error_index += 1 else: grid_json = H2OConnection.get_json("Grids/" + grid.dest_key) self.models = [ h2o.get_model(key['name']) for key in grid_json['model_ids'] ] #get first model returned in list of models from grid search to get model class (binomial, multinomial, etc) # sometimes no model is returned due to bad parameter values provided by the user. if len(grid_json['model_ids']) > 0: first_model_json = H2OConnection.get_json( "Models/" + grid_json['model_ids'][0]['name'], _rest_version=kwargs['_rest_version'])['models'][0] self._resolve_grid(grid.dest_key, grid_json, first_model_json) else: raise ValueError( "Gridsearch returns no model due to bad parameter values or other reasons...." )
def makeGLMModel(model, coefs, threshold=.5): model_json = H2OConnection.post_json("MakeGLMModel",model=model._model_json['model_id']['name'], names=list(coefs.keys()), beta = list(coefs.values()), threshold = threshold) m = H2OGeneralizedLinearEstimator() m._resolve_model(model_json['model_id']['name'], model_json) return m