def compute_metrics(cols, y_true, y_score, num_tasks, cal_fact_aucpr): if len(cols) < 1: return pd.DataFrame( { "roc_auc_score": np.nan, "auc_pr": np.nan, "avg_prec_score": np.nan, "f1_max": np.nan, "p_f1_max": np.nan, "kappa": np.nan, "kappa_max": np.nan, "p_kappa_max": np.nan, "bceloss": np.nan }, index=np.arange(num_tasks)) df = pd.DataFrame({"task": cols, "y_true": y_true, "y_score": y_score}) if hasattr(cal_fact_aucpr, "__len__"): metrics = df.groupby("task", sort=True).apply(lambda g: all_metrics( y_true=g.y_true.values, y_score=g.y_score.values, cal_fact_aucpr_task=cal_fact_aucpr[g['task'].values[0]])) else: metrics = df.groupby( "task", sort=True).apply(lambda g: all_metrics(y_true=g.y_true.values, y_score=g.y_score.values, cal_fact_aucpr_task=1.0)) metrics.reset_index(level=-1, drop=True, inplace=True) return metrics.reindex(np.arange(num_tasks))
def compute_metrics(cols, y_true, y_score): df = pd.DataFrame({"task": cols, "y_true": y_true, "y_score": y_score}) metrics = df.groupby("task", sort=True).apply(lambda g: all_metrics( y_true = g.y_true.values, y_score = g.y_score.values)) metrics.reset_index(level=-1, drop=True, inplace=True) return metrics
def compute_metrics(cols, y_true, y_score, num_tasks): if len(cols) < 1: return pd.DataFrame( { "roc_auc_score": np.nan, "auc_pr": np.nan, "avg_prec_score": np.nan, "max_f1_score": np.nan, "kappa": np.nan }, index=np.arange(num_tasks)) df = pd.DataFrame({"task": cols, "y_true": y_true, "y_score": y_score}) metrics = df.groupby("task", sort=True).apply(lambda g: all_metrics( y_true=g.y_true.values, y_score=g.y_score.values)) metrics.reset_index(level=-1, drop=True, inplace=True) return metrics.reindex(np.arange(num_tasks))
def compute_metrics_regr(cols, y_true, y_score, num_tasks, y_censor=None): """Returns metrics for regression tasks.""" if len(cols) < 1: return pd.DataFrame( { "rmse": np.nan, "rmse_uncen": np.nan, "rsquared": np.nan, "corrcoef": np.nan, }, index=np.arange(num_tasks)) df = pd.DataFrame({ "task": cols, "y_true": y_true, "y_score": y_score, "y_censor": y_censor, }) metrics = df.groupby("task", sort=True).apply(lambda g: all_metrics_regr( y_true=g.y_true.values, y_score=g.y_score.values, y_censor=g.y_censor.values if y_censor is not None else None)) metrics.reset_index(level=-1, drop=True, inplace=True) return metrics.reindex(np.arange(num_tasks))
def print_metrics_to_latex(metrics, filename): r"""Print metrics to latex and format them as \\bscellA{}.""" metrics = metrics.copy() def cell_format(value, char): return '\\bscell%s[%.3f]{%3.0f}' % (char, value, value * 100) def float_format_short(value): return '%.3f' % (value, ) def float_format_long(value): return '%.4f' % (value, ) def float_formatA(value): return cell_format(value, 'A') def float_formatB(value): return cell_format(value, 'B') # use formatB for all 'ROC' columns formatters = {} for value in metrics.columns.values: if value[1] == 'PR': formatters[value] = float_formatA elif value[1] == 'ROC': formatters[value] = float_formatB elif value[1] == 'ACC': formatters[value] = float_format_long else: formatters[value] = None # workaround because the index is not properly formatted in Latex metrics = metrics.reset_index() metrics.to_latex(filename, float_format=float_format_short, formatters=formatters, escape=False, index=False)