示例#1
0
  def _model_build(self, x, y, tframe, vframe, kwargs):
    kwargs['training_frame'] = tframe
    if vframe is not None: kwargs["validation_frame"] = vframe
    if isinstance(y, int): y = tframe.names[y]
    if y is not None: kwargs['response_column'] = y
    if not isinstance(x, (list,tuple)): x=[x]
    if isinstance(x[0], int):
      x = [tframe.names[i] for i in x]
    offset = kwargs["offset_column"]
    folds  = kwargs["fold_column"]
    weights= kwargs["weights_column"]
    ignored_columns = list(set(tframe.names) - set(x + [y,offset,folds,weights]))
    kwargs["ignored_columns"] = None if ignored_columns==[] else [h2o.h2o._quoted(col) for col in ignored_columns]
    kwargs["interactions"] = None if ("interactions" not in kwargs or kwargs["interactions"] is None) else [h2o.h2o._quoted(col) for col in kwargs["interactions"]]
    kwargs = dict([(k, H2OEstimator._keyify_if_H2OFrame(kwargs[k])) for k in kwargs])  # gruesome one-liner
    algo = self._compute_algo()

    model = H2OJob(H2OConnection.post_json("ModelBuilders/"+algo, **kwargs), job_type=(algo+" Model Build"))

    if self._future:
      self._job = model
      return

    model.poll()
    if '_rest_version' in list(kwargs.keys()): model_json = H2OConnection.get_json("Models/"+model.dest_key, _rest_version=kwargs['_rest_version'])["models"][0]
    else:                                model_json = H2OConnection.get_json("Models/"+model.dest_key)["models"][0]
    self._resolve_model(model.dest_key,model_json)
示例#2
0
  def _model_build(self, x, y, tframe, vframe, kwargs):
    kwargs['training_frame'] = tframe
    if vframe is not None: kwargs["validation_frame"] = vframe
    if isinstance(y, int): y = tframe.names[y]
    if y is not None: kwargs['response_column'] = y
    if not isinstance(x, (list,tuple)): x=[x]
    if isinstance(x[0], int):
      x = [tframe.names[i] for i in x]
    offset = kwargs["offset_column"]
    folds  = kwargs["fold_column"]
    weights= kwargs["weights_column"]
    ignored_columns = list(set(tframe.names) - set(x + [y,offset,folds,weights]))
    kwargs["ignored_columns"] = None if ignored_columns==[] else [h2o.h2o._quoted(col) for col in ignored_columns]
    kwargs = dict([(k, H2OEstimator._keyify_if_H2OFrame(kwargs[k])) for k in kwargs])
    algo = self._compute_algo()

    model = H2OJob(H2OConnection.post_json("ModelBuilders/"+algo, **kwargs), job_type=(algo+" Model Build"))

    if self._future:
      self._job = model
      return

    model.poll()
    if '_rest_version' in list(kwargs.keys()): model_json = H2OConnection.get_json("Models/"+model.dest_key, _rest_version=kwargs['_rest_version'])["models"][0]
    else:                                model_json = H2OConnection.get_json("Models/"+model.dest_key)["models"][0]
    self._resolve_model(model.dest_key,model_json)
示例#3
0
 def makeGLMModel(model, coefs, threshold=.5):
     model_json = H2OConnection.post_json(
         "MakeGLMModel",
         model=model._model_json['model_id']['name'],
         names=list(coefs.keys()),
         beta=list(coefs.values()),
         threshold=threshold)
     m = H2OGeneralizedLinearEstimator()
     m._resolve_model(model_json['model_id']['name'], model_json)
     return m
示例#4
0
  def _model_build(self, x, y, tframe, vframe, kwargs):
    kwargs['training_frame'] = tframe
    if vframe is not None: kwargs["validation_frame"] = vframe
    if isinstance(y, int): y = tframe.names[y]
    if y is not None: kwargs['response_column'] = y
    if not isinstance(x, (list,tuple)): x=[x]
    if isinstance(x[0], int):
      x = [tframe.names[i] for i in x]
    offset = kwargs["offset_column"]
    folds  = kwargs["fold_column"]
    weights= kwargs["weights_column"]
    ignored_columns = list(set(tframe.names) - set(x + [y,offset,folds,weights]))
    kwargs["ignored_columns"] = None if ignored_columns==[] else [h2o.h2o._quoted(col) for col in ignored_columns]
    kwargs = dict([(k, kwargs[k].frame_id if isinstance(kwargs[k], H2OFrame) else kwargs[k]) for k in kwargs if kwargs[k] is not None])  # gruesome one-liner
    algo = self.model._compute_algo()  #unique to grid search
    kwargs["_rest_version"] = 99  #unique to grid search
    if self.grid_id is not None: kwargs["grid_id"] = self.grid_id 

    grid = H2OJob(H2OConnection.post_json("Grid/"+algo, **kwargs), job_type=(algo+" Grid Build"))

    if self._future:
      self._job = grid
      return

    grid.poll()
    if '_rest_version' in list(kwargs.keys()):
      grid_json = H2OConnection.get_json("Grids/"+grid.dest_key, _rest_version=kwargs['_rest_version'])

      error_index = 0
      if len(grid_json["failure_details"]) > 0:
        print("Errors/Warnings building gridsearch model\n")

        for error_message in grid_json["failure_details"]:
          if isinstance(grid_json["failed_params"][error_index], dict):
            for h_name in grid_json['hyper_names']:
              print("Hyper-parameter: {0}, {1}".format(h_name, grid_json['failed_params'][error_index][h_name]))

          if len(grid_json["failure_stack_traces"]) > error_index:
            print("failure_details: {0}\nfailure_stack_traces: "
                  "{1}\n".format(error_message, grid_json['failure_stack_traces'][error_index]))
          error_index += 1
    else:
      grid_json = H2OConnection.get_json("Grids/"+grid.dest_key)

    self.models = [h2o.get_model(key['name']) for key in grid_json['model_ids']]

    #get first model returned in list of models from grid search to get model class (binomial, multinomial, etc)
    # sometimes no model is returned due to bad parameter values provided by the user.
    if len(grid_json['model_ids']) > 0:
      first_model_json = H2OConnection.get_json("Models/"+grid_json['model_ids'][0]['name'],
                                                _rest_version=kwargs['_rest_version'])['models'][0]
      self._resolve_grid(grid.dest_key, grid_json, first_model_json)
    else:
      raise ValueError("Gridsearch returns no model due to bad parameter values or other reasons....")
示例#5
0
文件: glm.py 项目: DocOZ001/h2o-3
 def makeGLMModel(model, coefs, threshold=.5):
     """
     Create a custom GLM model using the given coefficients.
     Needs to be passed source model trained on the dataset to extract the dataset information from.
       @param model - source model, used for extracting dataset information
       @param coefs - dictionary containing model coefficients
       @param threshold - (optional, only for binomial) decision threshold used for classification
     """
     model_json = H2OConnection.post_json("MakeGLMModel", model=model._model_json["model_id"]["name"],
         names=list(coefs.keys()), beta=list(coefs.values()), threshold=threshold)
     m = H2OGeneralizedLinearEstimator()
     m._resolve_model(model_json["model_id"]["name"], model_json)
     return m
示例#6
0
    def _model_build(self, x, y, tframe, vframe, kwargs):
        kwargs['training_frame'] = tframe
        if vframe is not None: kwargs["validation_frame"] = vframe
        if isinstance(y, int): y = tframe.names[y]
        if y is not None: kwargs['response_column'] = y
        if not isinstance(x, (list, tuple)): x = [x]
        if isinstance(x[0], int):
            x = [tframe.names[i] for i in x]
        offset = kwargs["offset_column"]
        folds = kwargs["fold_column"]
        weights = kwargs["weights_column"]
        ignored_columns = list(
            set(tframe.names) - set(x + [y, offset, folds, weights]))
        kwargs["ignored_columns"] = None if ignored_columns == [] else [
            h2o.h2o._quoted(col) for col in ignored_columns
        ]
        kwargs = dict([(k, kwargs[k].frame_id if isinstance(
            kwargs[k], H2OFrame) else kwargs[k]) for k in kwargs
                       if kwargs[k] is not None])  # gruesome one-liner
        algo = self.model._compute_algo()  #unique to grid search
        kwargs["_rest_version"] = 99  #unique to grid search
        if self.grid_id is not None: kwargs["grid_id"] = self.grid_id

        grid = H2OJob(H2OConnection.post_json("Grid/" + algo, **kwargs),
                      job_type=(algo + " Grid Build"))

        if self._future:
            self._job = grid
            return

        grid.poll()
        if '_rest_version' in list(kwargs.keys()):
            grid_json = H2OConnection.get_json(
                "Grids/" + grid.dest_key,
                _rest_version=kwargs['_rest_version'])
            for error_message in grid_json["failure_details"]:
                print(error_message)
        else:
            grid_json = H2OConnection.get_json("Grids/" + grid.dest_key)

        self.models = [
            h2o.get_model(key['name']) for key in grid_json['model_ids']
        ]
        #get first model returned in list of models from grid search to get model class (binomial, multinomial, etc)
        first_model_json = H2OConnection.get_json(
            "Models/" + grid_json['model_ids'][0]['name'],
            _rest_version=kwargs['_rest_version'])['models'][0]

        self._resolve_grid(grid.dest_key, grid_json, first_model_json)
示例#7
0
 def makeGLMModel(model, coefs, threshold=.5):
     """
     Create a custom GLM model using the given coefficients.
     Needs to be passed source model trained on the dataset to extract the dataset information from.
       @param model - source model, used for extracting dataset information
       @param coefs - dictionary containing model coefficients
       @param threshold - (optional, only for binomial) decision threshold used for classification
     """
     model_json = H2OConnection.post_json(
         "MakeGLMModel",
         model=model._model_json["model_id"]["name"],
         names=list(coefs.keys()),
         beta=list(coefs.values()),
         threshold=threshold)
     m = H2OGeneralizedLinearEstimator()
     m._resolve_model(model_json["model_id"]["name"], model_json)
     return m
示例#8
0
  def _model_build(self, x, y, tframe, vframe, kwargs):
    kwargs['training_frame'] = tframe
    if vframe is not None: kwargs["validation_frame"] = vframe
    if isinstance(y, int): y = tframe.names[y]
    if y is not None: kwargs['response_column'] = y
    if not isinstance(x, (list,tuple)): x=[x]
    if isinstance(x[0], int):
      x = [tframe.names[i] for i in x]
    offset = kwargs["offset_column"]
    folds  = kwargs["fold_column"]
    weights= kwargs["weights_column"]
    ignored_columns = list(set(tframe.names) - set(x + [y,offset,folds,weights]))
    kwargs["ignored_columns"] = None if ignored_columns==[] else [h2o.h2o._quoted(col) for col in ignored_columns]
    kwargs = dict([(k, kwargs[k].frame_id if isinstance(kwargs[k], H2OFrame) else kwargs[k]) for k in kwargs if kwargs[k] is not None])  # gruesome one-liner
    algo = self.model._compute_algo()  #unique to grid search
    kwargs["_rest_version"] = 99  #unique to grid search
    if self.grid_id is not None: kwargs["grid_id"] = self.grid_id 

    grid = H2OJob(H2OConnection.post_json("Grid/"+algo, **kwargs), job_type=(algo+" Grid Build"))

    if self._future:
      self._job = grid
      return

    grid.poll()
    if '_rest_version' in list(kwargs.keys()):
      grid_json = H2OConnection.get_json("Grids/"+grid.dest_key, _rest_version=kwargs['_rest_version'])
      for error_message in grid_json["failure_details"]:
        print(error_message)
    else:                                grid_json = H2OConnection.get_json("Grids/"+grid.dest_key)

    self.models = [h2o.get_model(key['name']) for key in grid_json['model_ids']]
    #get first model returned in list of models from grid search to get model class (binomial, multinomial, etc)
    first_model_json = H2OConnection.get_json("Models/"+grid_json['model_ids'][0]['name'], _rest_version=kwargs['_rest_version'])['models'][0]

    self._resolve_grid(grid.dest_key, grid_json, first_model_json)
示例#9
0
    def _model_build(self, x, y, tframe, vframe, kwargs):
        kwargs['training_frame'] = tframe
        if vframe is not None: kwargs["validation_frame"] = vframe
        if isinstance(y, int): y = tframe.names[y]
        if y is not None: kwargs['response_column'] = y
        if not isinstance(x, (list, tuple)): x = [x]
        if isinstance(x[0], int):
            x = [tframe.names[i] for i in x]
        offset = kwargs["offset_column"]
        folds = kwargs["fold_column"]
        weights = kwargs["weights_column"]
        ignored_columns = list(
            set(tframe.names) - set(x + [y, offset, folds, weights]))
        kwargs["ignored_columns"] = None if ignored_columns == [] else [
            h2o.h2o._quoted(col) for col in ignored_columns
        ]
        kwargs = dict([(k, kwargs[k].frame_id if isinstance(
            kwargs[k], H2OFrame) else kwargs[k]) for k in kwargs
                       if kwargs[k] is not None])  # gruesome one-liner
        algo = self.model._compute_algo()  #unique to grid search
        kwargs["_rest_version"] = 99  #unique to grid search
        if self.grid_id is not None: kwargs["grid_id"] = self.grid_id

        grid = H2OJob(H2OConnection.post_json("Grid/" + algo, **kwargs),
                      job_type=(algo + " Grid Build"))

        if self._future:
            self._job = grid
            return

        grid.poll()
        if '_rest_version' in list(kwargs.keys()):
            grid_json = H2OConnection.get_json(
                "Grids/" + grid.dest_key,
                _rest_version=kwargs['_rest_version'])

            error_index = 0
            if len(grid_json["failure_details"]) > 0:
                print("Errors/Warnings building gridsearch model\n")

                for error_message in grid_json["failure_details"]:
                    if isinstance(grid_json["failed_params"][error_index],
                                  dict):
                        for h_name in grid_json['hyper_names']:
                            print("Hyper-parameter: {0}, {1}".format(
                                h_name, grid_json['failed_params'][error_index]
                                [h_name]))

                    print("failure_details: {0}\nfailure_stack_traces: "
                          "{1}\n".format(
                              error_message,
                              grid_json['failure_stack_traces'][error_index]))
                    error_index += 1
        else:
            grid_json = H2OConnection.get_json("Grids/" + grid.dest_key)

        self.models = [
            h2o.get_model(key['name']) for key in grid_json['model_ids']
        ]

        #get first model returned in list of models from grid search to get model class (binomial, multinomial, etc)
        # sometimes no model is returned due to bad parameter values provided by the user.
        if len(grid_json['model_ids']) > 0:
            first_model_json = H2OConnection.get_json(
                "Models/" + grid_json['model_ids'][0]['name'],
                _rest_version=kwargs['_rest_version'])['models'][0]
            self._resolve_grid(grid.dest_key, grid_json, first_model_json)
        else:
            raise ValueError(
                "Gridsearch returns no model due to bad parameter values or other reasons...."
            )
示例#10
0
文件: glm.py 项目: Avighan/h2o-3
 def makeGLMModel(model, coefs, threshold=.5):
   model_json = H2OConnection.post_json("MakeGLMModel",model=model._model_json['model_id']['name'], names=list(coefs.keys()), beta = list(coefs.values()), threshold = threshold)
   m = H2OGeneralizedLinearEstimator()
   m._resolve_model(model_json['model_id']['name'], model_json)
   return m