def model_performance(self, test_data=None, train=False, valid=False): """ Generate model metrics for this model on test_data. :param test_data: Data set for which model metrics shall be computed against. Both train and valid arguments are ignored if test_data is not None. :param train: Report the training metrics for the model. If the test_data is the training data, the training metrics are returned. :param valid: Report the validation metrics for the model. If train and valid are True, then it defaults to True. :return: An object of class H2OModelMetrics. """ if test_data is None: if not train and not valid: train = True # default to train if train: return self._model_json["output"]["training_metrics"] if valid: return self._model_json["output"]["validation_metrics"] else: # cases dealing with test_data not None if not isinstance(test_data, H2OFrame): raise ValueError("`test_data` must be of type H2OFrame. Got: " + type(test_data)) fr_key = H2OFrame.send_frame(test_data) res = H2OConnection.post_json("ModelMetrics/models/" + self._key + "/frames/" + fr_key) h2o.removeFrameShallow(fr_key) # FIXME need to do the client-side filtering... PUBDEV-874: https://0xdata.atlassian.net/browse/PUBDEV-874 raw_metrics = None for mm in res["model_metrics"]: if mm["frame"]["name"] == fr_key: raw_metrics = mm break return self._metrics_class(raw_metrics,algo=self._model_json["algo"])
def predict(self, test_data): """ Predict on a dataset. :param test_data: Data to be predicted on. :return: A new H2OFrame filled with predictions. """ if not test_data: raise ValueError("Must specify test data") # cbind the test_data vecs together and produce a temp key test_data_key = H2OFrame.send_frame(test_data) # get the predictions # this job call is blocking j = H2OConnection.post_json("Predictions/models/" + self._key + "/frames/" + test_data_key) # toast the cbound frame h2o.removeFrameShallow(test_data_key) # retrieve the prediction frame prediction_frame_key = j["model_metrics"][0]["predictions"]["frame_id"]["name"] # get the actual frame meta dta pred_frame_meta = h2o.frame(prediction_frame_key)["frames"][0] # toast the prediction frame h2o.removeFrameShallow(prediction_frame_key) # collect the vec_ids vec_ids = pred_frame_meta["vec_ids"] # get the number of rows rows = pred_frame_meta["rows"] # get the column names cols = [col["label"] for col in pred_frame_meta["columns"]] # create a set of H2OVec objects vecs = H2OVec.new_vecs(zip(cols, vec_ids), rows) # return a new H2OFrame object return H2OFrame(vecs=vecs)
def predict(self, test_data): """ Predict on a dataset. :param test_data: Data to be predicted on. :return: A new H2OFrame filled with predictions. """ if not test_data: raise ValueError("Must specify test data") # cbind the test_data vecs together and produce a temp key test_data_key = H2OFrame.send_frame(test_data) # get the predictions # this job call is blocking j = H2OConnection.post_json("Predictions/models/" + self._key + "/frames/" + test_data_key) # toast the cbound frame h2o.removeFrameShallow(test_data_key) # retrieve the prediction frame prediction_frame_key = j["predictions_frame"]["name"] # get the actual frame meta dta pred_frame_meta = h2o.frame(prediction_frame_key)["frames"][0] # toast the prediction frame h2o.removeFrameShallow(prediction_frame_key) # collect the vec_ids vec_ids = pred_frame_meta["vec_ids"] # get the number of rows rows = pred_frame_meta["rows"] # get the column names cols = [col["label"] for col in pred_frame_meta["columns"]] # create a set of H2OVec objects vecs = H2OVec.new_vecs(zip(cols, vec_ids), rows) # return a new H2OFrame object return H2OFrame(vecs=vecs)
def deepfeatures(self, test_data, layer): """ Return hidden layer details :param test_data: Data to create a feature space on :param layer: 0 index hidden layer """ if not test_data: raise ValueError("Must specify test data") # create test_data by cbinding vecs test_data_key = H2OFrame.send_frame(test_data) # get the deepfeatures of the dataset j = H2OConnection.post_json("Predictions/models/" + self._key + "/frames/" + test_data_key, deep_features_hidden_layer=layer) # retreive the frame data deepfeatures_frame_key = j["predictions_frame"]["name"] df_frame_meta = h2o.frame(deepfeatures_frame_key)["frames"][0] # create vecs by extracting vec_ids, col length, and col names vec_ids = df_frame_meta["vec_ids"] rows = df_frame_meta["rows"] cols = [col["label"] for col in df_frame_meta["columns"]] vecs = H2OVec.new_vecs(zip(cols, vec_ids), rows) # remove test data from kv h2o.removeFrameShallow(test_data_key) # finally return frame return H2OFrame(vecs=vecs)
def _model_build(x,y,validation_x,validation_y,algo_url,kwargs): # Basic sanity checking if algo_url == "autoencoder": if "autoencoder" in kwargs.keys(): if kwargs["autoencoder"]: if y: raise ValueError("`y` should not be specified for autoencoder, remove `y` input.") algo_url="deeplearning" if not x: raise ValueError("Missing features") x = _check_frame(x,y,y) if validation_x: validation_x = _check_frame(validation_x,validation_y,y) # Send frame descriptions to H2O cluster train_key = x.send_frame() kwargs['training_frame']=train_key if validation_x is not None: valid_key = validation_x.send_frame() kwargs['validation_frame']=valid_key if y: kwargs['response_column']=y._name kwargs = dict([(k, kwargs[k]) for k in kwargs if kwargs[k] is not None]) # launch the job and poll job = H2OJob(H2OConnection.post_json("ModelBuilders/"+algo_url, **kwargs), job_type=(algo_url+" Model Build")).poll() model_json = H2OConnection.get_json("Models/"+job.dest_key)["models"][0] model_type = model_json["output"]["model_category"] if model_type=="Binomial": from model.binomial import H2OBinomialModel model = H2OBinomialModel(job.dest_key,model_json) elif model_type=="Clustering": from model.clustering import H2OClusteringModel model = H2OClusteringModel(job.dest_key,model_json) elif model_type=="Regression": from model.regression import H2ORegressionModel model = H2ORegressionModel(job.dest_key,model_json) elif model_type=="Multinomial": from model.multinomial import H2OMultinomialModel model = H2OMultinomialModel(job.dest_key,model_json) elif model_type=="AutoEncoder": from model.autoencoder import H2OAutoEncoderModel model = H2OAutoEncoderModel(job.dest_key,model_json) elif model_type=="DimReduction": from model.dim_reduction import H2ODimReductionModel model = H2ODimReductionModel(job.dest_key,model_json) else: print model_type raise NotImplementedError # Cleanup h2o.removeFrameShallow(train_key) if validation_x: h2o.removeFrameShallow(valid_key) return model
def _model_build(x, y, validation_x, validation_y, algo_url, kwargs): # Basic sanity checking if algo_url == "autoencoder": if "autoencoder" in kwargs.keys(): if kwargs["autoencoder"]: if y: raise ValueError( "`y` should not be specified for autoencoder, remove `y` input." ) algo_url = "deeplearning" if not x: raise ValueError("Missing features") x = _check_frame(x, y, y) if validation_x: validation_x = _check_frame(validation_x, validation_y, y) # Send frame descriptions to H2O cluster train_key = x.send_frame() kwargs['training_frame'] = train_key if validation_x is not None: valid_key = validation_x.send_frame() kwargs['validation_frame'] = valid_key if y: kwargs['response_column'] = y._name kwargs = dict([(k, kwargs[k]) for k in kwargs if kwargs[k] is not None]) # launch the job and poll job = H2OJob(H2OConnection.post_json("ModelBuilders/" + algo_url, **kwargs), job_type=(algo_url + " Model Build")).poll() model_json = H2OConnection.get_json("Models/" + job.dest_key)["models"][0] model_type = model_json["output"]["model_category"] if model_type == "Binomial": from model.binomial import H2OBinomialModel model = H2OBinomialModel(job.dest_key, model_json) elif model_type == "Clustering": from model.clustering import H2OClusteringModel model = H2OClusteringModel(job.dest_key, model_json) elif model_type == "Regression": from model.regression import H2ORegressionModel model = H2ORegressionModel(job.dest_key, model_json) elif model_type == "Multinomial": from model.multinomial import H2OMultinomialModel model = H2OMultinomialModel(job.dest_key, model_json) elif model_type == "AutoEncoder": from model.autoencoder import H2OAutoEncoderModel model = H2OAutoEncoderModel(job.dest_key, model_json) else: print model_type raise NotImplementedError # Cleanup h2o.removeFrameShallow(train_key) if validation_x: h2o.removeFrameShallow(valid_key) return model