def _arg_to_expr(arg): if arg is not None and isinstance(arg, range): arg = list(arg) if arg is None: return "[]" # empty list elif isinstance(arg, ExprNode): return arg._do_it(False) elif isinstance(arg, ASTId): return str(arg) elif isinstance(arg, bool): return "{}".format("TRUE" if arg else "FALSE") elif is_numeric(arg): return "{}".format("NaN" if math.isnan(arg) else arg) elif is_str(arg): return '"' + arg + '"' elif isinstance(arg, slice): return "[{}:{}]".format(0 if arg.start is None else arg.start, "NaN" if (arg.stop is None or math.isnan(arg.stop)) else ( arg.stop) if arg.start is None else (arg.stop - arg.start)) elif isinstance(arg, list): allstrs = all(is_str(elem) for elem in arg) if allstrs: return "[%s]" % " ".join('"%s"' % elem for elem in arg) else: return "[%s]" % " ".join("NaN" if i == 'NaN' or math.isnan(i) else str(i) for i in arg) raise ValueError("Unexpected arg type: " + str(type(arg)) + " " + str(arg.__class__) + " " + arg.__repr__())
def _arg_to_expr(arg): if arg is not None and isinstance(arg, range): arg = list(arg) if arg is None: return "[]" # empty list elif isinstance(arg, ExprNode): return arg._do_it(False) elif isinstance(arg, ASTId): return str(arg) elif isinstance(arg, bool): return "{}".format("TRUE" if arg else "FALSE") elif is_numeric(arg): return "{}".format("NaN" if math.isnan(arg) else arg) elif is_str(arg): return '"' + arg + '"' elif isinstance(arg, slice): return "[{}:{}]".format( 0 if arg.start is None else arg.start, "NaN" if (arg.stop is None or math.isnan(arg.stop)) else (arg.stop) if arg.start is None else (arg.stop - arg.start)) elif isinstance(arg, list): allstrs = all(is_str(elem) for elem in arg) if allstrs: return "[%s]" % " ".join('"%s"' % elem for elem in arg) else: return "[%s]" % " ".join( "NaN" if i == 'NaN' or math.isnan(i) else str(i) for i in arg) raise ValueError("Unexpected arg type: " + str(type(arg)) + " " + str(arg.__class__) + " " + arg.__repr__())
def _handle_python_dicts(python_obj): header = list(python_obj.keys()) is_valid = all([ re.match(r'^[a-zA-Z_][a-zA-Z0-9_.]*$', col) for col in header ]) # is this a valid header? if not is_valid: raise ValueError( "Did not get a valid set of column names! Must match the regular expression: ^[a-zA-Z_][a-zA-Z0-9_.]*$ " ) for k in python_obj: # check that each value entry is a flat list/tuple or single int, float, or string v = python_obj[k] if isinstance( v, (tuple, list)): # if value is a tuple/list, then it must be flat if _is_list_of_lists(v): raise ValueError("Values in the dictionary must be flattened!") elif is_numeric(v) or is_str(v): python_obj[k] = [v] else: raise ValueError( "Encountered invalid dictionary value when constructing H2OFrame. Got: {0}" .format(v)) rows = list(map(list, itertools.zip_longest(*list(python_obj.values())))) data_to_write = [dict(list(zip(header, row))) for row in rows] return header, data_to_write
def _handle_python_dicts(python_obj): header = list(python_obj.keys()) is_valid = all([re.match(r'^[a-zA-Z_][a-zA-Z0-9_.]*$', col) for col in header]) # is this a valid header? if not is_valid: raise ValueError( "Did not get a valid set of column names! Must match the regular expression: ^[a-zA-Z_][a-zA-Z0-9_.]*$ ") for k in python_obj: # check that each value entry is a flat list/tuple or single int, float, or string v = python_obj[k] if isinstance(v, (tuple, list)): # if value is a tuple/list, then it must be flat if _is_list_of_lists(v): raise ValueError("Values in the dictionary must be flattened!") elif is_numeric(v) or is_str(v): python_obj[k] = [v] else: raise ValueError("Encountered invalid dictionary value when constructing H2OFrame. Got: {0}".format(v)) rows = list(map(list, itertools.zip_longest(*list(python_obj.values())))) data_to_write = [dict(list(zip(header, row))) for row in rows] return header, data_to_write
def confusion_matrix(self, metrics=None, thresholds=None): """ Get the confusion matrix for the specified metric :param metrics: A string (or list of strings) in {"min_per_class_accuracy", "absolute_mcc", "tnr", "fnr", "fpr", "tpr", "precision", "accuracy", "f0point5", "f2", "f1","mean_per_class_accuracy"} :param thresholds: A value (or list of values) between 0 and 1 :return: a list of ConfusionMatrix objects (if there are more than one to return), or a single ConfusionMatrix (if there is only one) """ # make lists out of metrics and thresholds arguments if metrics is None and thresholds is None: metrics = ["f1"] if isinstance(metrics, list): metrics_list = metrics elif metrics is None: metrics_list = [] else: metrics_list = [metrics] if isinstance(thresholds, list): thresholds_list = thresholds elif thresholds is None: thresholds_list = [] else: thresholds_list = [thresholds] # error check the metrics_list and thresholds_list if not all(is_numeric(t) for t in thresholds_list) or \ not all(t >= 0 or t <= 1 for t in thresholds_list): raise ValueError( "All thresholds must be numbers between 0 and 1 (inclusive).") if not all(m in [ "min_per_class_accuracy", "absolute_mcc", "precision", "recall", "specificity", "accuracy", "f0point5", "f2", "f1", "mean_per_class_accuracy" ] for m in metrics_list): raise ValueError( "The only allowable metrics are min_per_class_accuracy, absolute_mcc, precision, accuracy, f0point5, f2, f1, mean_per_class_accuracy" ) # make one big list that combines the thresholds and metric-thresholds metrics_thresholds = [ self.find_threshold_by_max_metric(m) for m in metrics_list ] for mt in metrics_thresholds: thresholds_list.append(mt) thresh2d = self._metric_json['thresholds_and_metric_scores'] actual_thresholds = [ float(e[0]) for i, e in enumerate(thresh2d.cell_values) ] cms = [] for t in thresholds_list: idx = self.find_idx_by_threshold(t) row = thresh2d.cell_values[idx] tns = row[11] fns = row[12] fps = row[13] tps = row[14] p = tps + fns n = tns + fps c0 = n - fps c1 = p - tps if t in metrics_thresholds: m = metrics_list[metrics_thresholds.index(t)] table_header = "Confusion Matrix (Act/Pred) for max " + m + " @ threshold = " + str( actual_thresholds[idx]) else: table_header = "Confusion Matrix (Act/Pred) @ threshold = " + str( actual_thresholds[idx]) cms.append( ConfusionMatrix(cm=[[c0, fps], [c1, tps]], domains=self._metric_json['domain'], table_header=table_header)) if len(cms) == 1: return cms[0] else: return cms
def _is_num_list(l): return isinstance(l, (tuple, list)) and all(is_numeric(i) for i in l)
def confusion_matrix(self, metrics=None, thresholds=None): """ Get the confusion matrix for the specified metric :param metrics: A string (or list of strings) in {"min_per_class_accuracy", "absolute_mcc", "tnr", "fnr", "fpr", "tpr", "precision", "accuracy", "f0point5", "f2", "f1","mean_per_class_accuracy"} :param thresholds: A value (or list of values) between 0 and 1 :return: a list of ConfusionMatrix objects (if there are more than one to return), or a single ConfusionMatrix (if there is only one) """ # make lists out of metrics and thresholds arguments if metrics is None and thresholds is None: metrics = ["f1"] if isinstance(metrics, list): metrics_list = metrics elif metrics is None: metrics_list = [] else: metrics_list = [metrics] if isinstance(thresholds, list): thresholds_list = thresholds elif thresholds is None: thresholds_list = [] else: thresholds_list = [thresholds] # error check the metrics_list and thresholds_list if not all(is_numeric(t) for t in thresholds_list) or \ not all(t >= 0 or t <= 1 for t in thresholds_list): raise ValueError("All thresholds must be numbers between 0 and 1 (inclusive).") if not all(m in ["min_per_class_accuracy", "absolute_mcc", "precision", "recall", "specificity", "accuracy", "f0point5", "f2", "f1", "mean_per_class_accuracy"] for m in metrics_list): raise ValueError( "The only allowable metrics are min_per_class_accuracy, absolute_mcc, precision, accuracy, f0point5, f2, f1, mean_per_class_accuracy") # make one big list that combines the thresholds and metric-thresholds metrics_thresholds = [self.find_threshold_by_max_metric(m) for m in metrics_list] for mt in metrics_thresholds: thresholds_list.append(mt) thresh2d = self._metric_json['thresholds_and_metric_scores'] actual_thresholds = [float(e[0]) for i, e in enumerate(thresh2d.cell_values)] cms = [] for t in thresholds_list: idx = self.find_idx_by_threshold(t) row = thresh2d.cell_values[idx] tns = row[11] fns = row[12] fps = row[13] tps = row[14] p = tps + fns n = tns + fps c0 = n - fps c1 = p - tps if t in metrics_thresholds: m = metrics_list[metrics_thresholds.index(t)] table_header = "Confusion Matrix (Act/Pred) for max " + m + " @ threshold = " + str( actual_thresholds[idx]) else: table_header = "Confusion Matrix (Act/Pred) @ threshold = " + str(actual_thresholds[idx]) cms.append(ConfusionMatrix(cm=[[c0, fps], [c1, tps]], domains=self._metric_json['domain'], table_header=table_header)) if len(cms) == 1: return cms[0] else: return cms