Exemplo n.º 1
0
def test_table():
    """Test functionality of the H2OTwoDimTable class."""
    tbl1 = H2OTwoDimTable(cell_values=[[1, 2, 3], [10, 20, 30]],
                          col_header=list("ABC"))
    tbl1.show()

    print()
    tbl2 = H2OTwoDimTable(cell_values=[[1, 2, 4]] * 10,
                          col_header=["q1", "q2", "q3"],
                          row_header=range(10),
                          table_header="Table 2")
    tbl2.show()

    assert tbl2["q1"] == [1] * 10
    assert tbl2["q2"] == [2] * 10
    assert tbl2["q3"] == [4] * 10
    assert tbl2[0] == [1] * 10
    assert tbl2[-1] == [4] * 10
    assert tbl2[[0, 1]] == [[1] * 10, [2] * 10]
    assert tbl2[["q3"]] == [[4] * 10]

    try:
        H2OTwoDimTable(cell_values=[[1, 2, 3, 4], [1, 2, 3]])
    except H2OTypeError:
        pass
def glrm_catagorical_bug_fix():
    print("Importing prostate.csv data...")

    tbl2 = H2OTwoDimTable(cell_values=[[1, 2, 4]] * 10,
                          col_header=["q1", "q2", "q3"],
                          row_header=range(10),
                          table_header="Table 2")

    # H2OTwoDimTable containing the correct archetype values run before Wendy optimized memory for GLRM
    cell_values = [[
        'Arch1', 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0,
        58.295918367346935, 8.810102040816325, 11.344897959183678,
        6.285714285714286
    ],
                   [
                       'Arch2', 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0,
                       1.0, 0.0, 69.35514018691589, 7.538224299065424,
                       10.087757009345797, 5.6168224299065415
                   ],
                   [
                       'Arch3', 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0,
                       1.0, 0.0, 64.68, 75.892, 10.812000000000001, 7.44
                   ],
                   [
                       'Arch4', 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0,
                       1.0, 0.0, 68.77083333333333, 13.368750000000002,
                       49.44583333333334, 5.9375
                   ],
                   [
                       'Arch5', 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0,
                       1.0, 0.0, 69.04901960784314, 16.140196078431373,
                       11.510000000000005, 7.235294117647059
                   ]]
    col_header = [
        'dprosboth', 'dprosleft', 'dprosnone', 'dprosright', 'raceblack',
        'racena', 'racewhite', 'capsuleno', 'capsuleyes', 'dcapsno',
        'dcapsyes', 'age', 'psa', 'vol', 'gleason'
    ]
    row_header = ['Arch1', 'Arch2', 'Arch3', 'Arch4', 'Arch5']
    table_header = "archetypes"
    correct_archetype = H2OTwoDimTable(cell_values=cell_values,
                                       col_header=col_header,
                                       row_header=row_header,
                                       table_header=table_header)

    prostateF = h2o.upload_file(
        pyunit_utils.locate("smalldata/prostate/prostate_cat.csv"))

    glrm_h2o = H2OGeneralizedLowRankEstimator(k=5, recover_svd=True, seed=1234)
    glrm_h2o.train(x=prostateF.names, training_frame=prostateF)
    glrm_h2o.show()

    assert pyunit_utils.equal_2d_tables(glrm_h2o._model_json["output"]["archetypes"]._cell_values,
                                        correct_archetype._cell_values, tolerance=1e-4), \
        "GLRM model archetypes generated from current model are not correct."
Exemplo n.º 3
0
 def __new__(cls, keyvals):
     # This method is called by the simplejson.json(object_pairs_hook=<this>)
     # `keyvals` is a list of (key,value) tuples. For example:
     #    [("schema_version", 3), ("schema_name", "InitIDV3"), ("schema_type", "Iced")]
     schema = None
     for k, v in keyvals:
         if k == "__meta" and isinstance(v, dict):
             schema = v["schema_name"]
             break
         if k == "__schema" and is_type(v, str):
             schema = v
             break
     if schema == "MetadataV3": return H2OMetadataV3.make(keyvals)
     if schema == "CloudV3": return H2OCluster.make(keyvals)
     if schema == "H2OErrorV3": return H2OErrorV3.make(keyvals)
     if schema == "H2OModelBuilderErrorV3":
         return H2OModelBuilderErrorV3.make(keyvals)
     if schema == "TwoDimTableV3": return H2OTwoDimTable.make(keyvals)
     if schema == "ModelMetricsRegressionV3":
         return H2ORegressionModelMetrics.make(keyvals)
     if schema == "ModelMetricsClusteringV3":
         return H2OClusteringModelMetrics.make(keyvals)
     if schema == "ModelMetricsBinomialV3":
         return H2OBinomialModelMetrics.make(keyvals)
     if schema == "ModelMetricsBinomialUpliftV3":
         return H2OBinomialUpliftModelMetrics.make(keyvals)
     if schema == "ModelMetricsMultinomialV3":
         return H2OMultinomialModelMetrics.make(keyvals)
     if schema == "ModelMetricsOrdinalV3":
         return H2OOrdinalModelMetrics.make(keyvals)
     if schema == "ModelMetricsAutoEncoderV3":
         return H2OAutoEncoderModelMetrics.make(keyvals)
     return super(H2OResponse, cls).__new__(cls, keyvals)
Exemplo n.º 4
0
    def sort_by(self, metric, increasing=True):
        """
        Sort the models in the grid space by a metric.

        Parameters
        ----------
        metric: str
          A metric ('logloss', 'auc', 'r2') by which to sort the models. If addtional arguments are desired,
          they can be passed to the metric, for example 'logloss(valid=True)'
        increasing: boolean, optional
          Sort the metric in increasing (True) (default) or decreasing (False) order.

        Returns
        -------
          An H2OTwoDimTable of the sorted models showing model id, hyperparameters, and metric value. The best model can
          be selected and used for prediction.

        Examples
        --------
          >>> grid_search_results = gs.sort_by('F1', False)
          >>> best_model_id = grid_search_results['Model Id'][0]
          >>> best_model = h2o.get_model(best_model_id)
          >>> best_model.predict(test_data)
        """

        if metric[-1] != ')': metric += '()'
        c_values = [list(x) for x in zip(*sorted(eval('self.' + metric + '.items()'), key=lambda k_v: k_v[1]))]
        c_values.insert(1, [self.get_hyperparams(model_id, display=False) for model_id in c_values[0]])
        if not increasing:
            for col in c_values: col.reverse()
        if metric[-2] == '(': metric = metric[:-2]
        return H2OTwoDimTable(
            col_header=['Model Id', 'Hyperparameters: [' + ', '.join(list(self.hyper_params.keys())) + ']', metric],
            table_header='Grid Search Results for ' + self.model.__class__.__name__,
            cell_values=[list(x) for x in zip(*c_values)])
Exemplo n.º 5
0
    def __init__(self, cm, domains=None, table_header=None):
        if not cm: raise ValueError("Missing data, `cm_raw` is None")
        if not isinstance(cm, list):
            raise ValueError("`cm` is not a list. Got: " + type(cm))

        if len(cm) == 2: cm = list(zip(*cm))  # transpose if 2x2
        nclass = len(cm)
        class_errs = [0] * nclass
        class_sums = [0] * nclass
        class_err_strings = [0] * nclass
        cell_values = [[0] * (1 + nclass)] * (1 + nclass)
        totals = [sum(c) for c in cm]
        total_errs = 0
        for i in range(nclass):
            class_errs[i] = sum([v[i] for v in cm[:i] + cm[(i + 1):]])
            total_errs += class_errs[i]
            class_sums[i] = sum([v[i] for v in cm])  # row sums
            class_err_strings[i] = \
                " (" + str(class_errs[i]) + "/" + str(class_sums[i]) + ")"
            class_errs[i] = float("nan") if class_sums[i] == 0 else round(
                class_errs[i] / class_sums[i], self.ROUND)
            # and the cell_values are
            cell_values[i] = [v[i] for v in cm] + [str(class_errs[i])
                                                   ] + [class_err_strings[i]]

        # tally up the totals
        class_errs += [sum(class_errs)]
        totals += [sum(class_sums)]
        class_err_strings += [
            " (" + str(total_errs) + "/" + str(totals[-1]) + ")"
        ]

        class_errs[-1] = float("nan") if totals[-1] == 0 else round(
            total_errs / totals[-1], self.ROUND)

        # do the last row of cell_values ... the "totals" row
        cell_values[-1] = totals[0:-1] + [str(class_errs[-1])
                                          ] + [class_err_strings[-1]]

        if table_header is None: table_header = "Confusion Matrix (Act/Pred)"
        col_header = [""]  # no column label for the "rows" column
        if domains is not None:
            import copy
            row_header = copy.deepcopy(domains)
            col_header += copy.deepcopy(domains)
        else:
            row_header = [str(i) for i in range(nclass)]
            col_header += [str(i) for i in range(nclass)]

        row_header += ["Total"]
        col_header += ["Error", "Rate"]

        for i in range(len(row_header)):
            cell_values[i].insert(0, row_header[i])

        self.table = H2OTwoDimTable(row_header=row_header,
                                    col_header=col_header,
                                    table_header=table_header,
                                    cell_values=cell_values)
Exemplo n.º 6
0
 def get_status(self):
     """
     Returns H2OTwoDimTable with current cluster status information.
     """
     keys = _cluster_status_info_keys
     values = self._get_cluster_status_info_values()
     table = H2OTwoDimTable(cell_values=[values], col_header=keys)
     return table
Exemplo n.º 7
0
 def show(self):
   """Print models sorted by metric"""
   hyper_combos = itertools.product(*list(self.hyper_params.values()))
   if not self.models:
     c_values = [[idx+1, list(val)] for idx, val in enumerate(hyper_combos)]
     print(H2OTwoDimTable(col_header=['Model', 'Hyperparameters: [' + ', '.join(list(self.hyper_params.keys()))+']'],
                          table_header='Grid Search of Model ' + self.model.__class__.__name__, cell_values=c_values))
   else:
     print(self.sorted_metric_table())
Exemplo n.º 8
0
    def sort_by(self, metric, increasing=True):
        """Deprecated since 2016-12-12, use grid.get_grid() instead."""

        if metric[-1] != ')': metric += '()'
        c_values = [list(x) for x in zip(*sorted(eval('self.' + metric + '.items()'), key=lambda k_v: k_v[1]))]
        c_values.insert(1, [self.get_hyperparams(model_id, display=False) for model_id in c_values[0]])
        if not increasing:
            for col in c_values: col.reverse()
        if metric[-2] == '(': metric = metric[:-2]
        return H2OTwoDimTable(
            col_header=['Model Id', 'Hyperparameters: [' + ', '.join(list(self.hyper_params.keys())) + ']', metric],
            table_header='Grid Search Results for ' + self.model.__class__.__name__,
            cell_values=[list(x) for x in zip(*c_values)])
Exemplo n.º 9
0
 def get_status_details(self):
     """
     Returns H2OTwoDimTable with detailed current status information about each node.
     """
     if self._retrieved_at + self.REFRESH_INTERVAL < time.time():
         # Info is stale, need to refresh
         new_info = h2o.api("GET /3/Cloud")
         self._fill_from_h2ocluster(new_info)
     keys = _cluster_status_detailed_info_keys[:]
     node_table = [["Node %d" % (j + 1)] + [node[k] for k in keys]
                   for j, node in enumerate(self.nodes)]
     keys.insert(0, "node")
     table = H2OTwoDimTable(cell_values=node_table,
                            col_header=keys,
                            row_header=keys)
     return table
Exemplo n.º 10
0
 def __new__(cls, keyvals):
     # This method is called by the simplejson.json(object_pairs_hook=<this>)
     # `keyvals` is a list of (key,value) tuples. For example:
     #    [("schema_version", 3), ("schema_name", "InitIDV3"), ("schema_type", "Iced")]
     schema = None
     for k, v in keyvals:
         if k == "__meta" and isinstance(v, dict):
             schema = v["schema_name"]
             break
         if k == "__schema" and is_str(v):
             schema = v
             break
     if schema == "CloudV3": return H2OCluster(keyvals)
     if schema == "H2OErrorV3": return H2OErrorV3(keyvals)
     if schema == "H2OModelBuilderErrorV3": return H2OModelBuilderErrorV3(keyvals)
     if schema == "TwoDimTableV3": return H2OTwoDimTable.make(keyvals)
     if schema == "ModelMetricsRegressionV3": return H2ORegressionModelMetrics.make(keyvals)
     if schema == "ModelMetricsClusteringV3": return H2OClusteringModelMetrics.make(keyvals)
     if schema == "ModelMetricsBinomialV3": return H2OBinomialModelMetrics.make(keyvals)
     if schema == "ModelMetricsMultinomialV3": return H2OMultinomialModelMetrics.make(keyvals)
     if schema == "ModelMetricsAutoEncoderV3": return H2OAutoEncoderModelMetrics.make(keyvals)
     return super(H2OResponse, cls).__new__(cls, keyvals)