Exemplo n.º 1
0
def compute_metrics(cols, y_true, y_score, num_tasks, cal_fact_aucpr):
    if len(cols) < 1:
        return pd.DataFrame(
            {
                "roc_auc_score": np.nan,
                "auc_pr": np.nan,
                "avg_prec_score": np.nan,
                "f1_max": np.nan,
                "p_f1_max": np.nan,
                "kappa": np.nan,
                "kappa_max": np.nan,
                "p_kappa_max": np.nan,
                "bceloss": np.nan
            },
            index=np.arange(num_tasks))
    df = pd.DataFrame({"task": cols, "y_true": y_true, "y_score": y_score})
    if hasattr(cal_fact_aucpr, "__len__"):
        metrics = df.groupby("task", sort=True).apply(lambda g: all_metrics(
            y_true=g.y_true.values,
            y_score=g.y_score.values,
            cal_fact_aucpr_task=cal_fact_aucpr[g['task'].values[0]]))
    else:
        metrics = df.groupby(
            "task",
            sort=True).apply(lambda g: all_metrics(y_true=g.y_true.values,
                                                   y_score=g.y_score.values,
                                                   cal_fact_aucpr_task=1.0))
    metrics.reset_index(level=-1, drop=True, inplace=True)
    return metrics.reindex(np.arange(num_tasks))
Exemplo n.º 2
0
def compute_metrics(cols, y_true, y_score):
    df   = pd.DataFrame({"task": cols, "y_true": y_true, "y_score": y_score})
    metrics = df.groupby("task", sort=True).apply(lambda g:
              all_metrics(
                  y_true  = g.y_true.values,
                  y_score = g.y_score.values))
    metrics.reset_index(level=-1, drop=True, inplace=True)
    return metrics
Exemplo n.º 3
0
def compute_metrics(cols, y_true, y_score, num_tasks):
    if len(cols) < 1:
        return pd.DataFrame(
            {
                "roc_auc_score": np.nan,
                "auc_pr": np.nan,
                "avg_prec_score": np.nan,
                "max_f1_score": np.nan,
                "kappa": np.nan
            },
            index=np.arange(num_tasks))
    df = pd.DataFrame({"task": cols, "y_true": y_true, "y_score": y_score})
    metrics = df.groupby("task", sort=True).apply(lambda g: all_metrics(
        y_true=g.y_true.values, y_score=g.y_score.values))
    metrics.reset_index(level=-1, drop=True, inplace=True)
    return metrics.reindex(np.arange(num_tasks))
Exemplo n.º 4
0
def compute_metrics_regr(cols, y_true, y_score, num_tasks, y_censor=None):
    """Returns metrics for regression tasks."""
    if len(cols) < 1:
        return pd.DataFrame(
            {
                "rmse": np.nan,
                "rmse_uncen": np.nan,
                "rsquared": np.nan,
                "corrcoef": np.nan,
            },
            index=np.arange(num_tasks))
    df = pd.DataFrame({
        "task": cols,
        "y_true": y_true,
        "y_score": y_score,
        "y_censor": y_censor,
    })
    metrics = df.groupby("task", sort=True).apply(lambda g: all_metrics_regr(
        y_true=g.y_true.values,
        y_score=g.y_score.values,
        y_censor=g.y_censor.values if y_censor is not None else None))
    metrics.reset_index(level=-1, drop=True, inplace=True)
    return metrics.reindex(np.arange(num_tasks))
def print_metrics_to_latex(metrics, filename):
    r"""Print metrics to latex and format them as \\bscellA{}."""
    metrics = metrics.copy()

    def cell_format(value, char):
        return '\\bscell%s[%.3f]{%3.0f}' % (char, value, value * 100)

    def float_format_short(value):
        return '%.3f' % (value, )

    def float_format_long(value):
        return '%.4f' % (value, )

    def float_formatA(value):
        return cell_format(value, 'A')

    def float_formatB(value):
        return cell_format(value, 'B')

    # use formatB for all 'ROC' columns
    formatters = {}
    for value in metrics.columns.values:
        if value[1] == 'PR':
            formatters[value] = float_formatA
        elif value[1] == 'ROC':
            formatters[value] = float_formatB
        elif value[1] == 'ACC':
            formatters[value] = float_format_long
        else:
            formatters[value] = None

    # workaround because the index is not properly formatted in Latex
    metrics = metrics.reset_index()

    metrics.to_latex(filename,
                     float_format=float_format_short,
                     formatters=formatters,
                     escape=False,
                     index=False)