Exemplo n.º 1
0
    def perf(devel_scores, test_scores, threshold_func):

        from bob.measure import farfrr

        devel_attack_scores = devel_scores[1][:, 0]
        devel_real_scores = devel_scores[0][:, 0]
        test_attack_scores = test_scores[1][:, 0]
        test_real_scores = test_scores[0][:, 0]

        devel_real = devel_real_scores.shape[0]
        devel_attack = devel_attack_scores.shape[0]
        test_real = test_real_scores.shape[0]
        test_attack = test_attack_scores.shape[0]

        thres = threshold_func(devel_attack_scores, devel_real_scores)
        devel_far, devel_frr = farfrr(devel_attack_scores, devel_real_scores,
                                      thres)
        test_far, test_frr = farfrr(test_attack_scores, test_real_scores,
                                    thres)

        retval = {'threshold': '%.4f' % thres}
        d = make_dict('devel-', devel_far, devel_attack, devel_frr, devel_real)
        retval.update(d)
        d = make_dict('test-', test_far, test_attack, test_frr, test_real)
        retval.update(d)

        return retval, thres
Exemplo n.º 2
0
def perf_hter(test_scores, devel_scores, threshold_func):
  """Computes a performance table and returns the HTER for the test and development set, as well as a formatted text with the results and the value of the threshold obtained for the given threshold function
     Keyword parameters:
       test_scores - the scores of the samples in the test set
       devel_scores - the scores of the samples in the development set
       threshold function - the type of threshold
  """ 
   
  from bob.measure import farfrr

  devel_attack_scores = devel_scores[1][:,0]
  devel_real_scores = devel_scores[0][:,0]
  test_attack_scores = test_scores[1][:,0]
  test_real_scores = test_scores[0][:,0]

  devel_real = devel_real_scores.shape[0]
  devel_attack = devel_attack_scores.shape[0]
  test_real = test_real_scores.shape[0]
  test_attack = test_attack_scores.shape[0]

  thres = threshold_func(devel_attack_scores, devel_real_scores)
  devel_far, devel_frr = farfrr(devel_attack_scores, devel_real_scores, thres)
  test_far, test_frr = farfrr(test_attack_scores, test_real_scores, thres)
  devel_hter = 50 * (devel_far + devel_frr)
  test_hter = 50 * (test_far + test_frr)
  devel_text = " d: FAR %.2f%% / FRR %.2f%% / HTER %.2f%% " % (100*devel_far, 100*devel_frr, devel_hter)
  test_text = " t: FAR %.2f%% / FRR %.2f%% / HTER %.2f%% " % (100*test_far, 100*test_frr, test_hter)
  return (test_hter, devel_hter), (test_text, devel_text), thres
Exemplo n.º 3
0
    def perf(devel_scores, test_scores, threshold_func):

        from bob.measure import farfrr

        devel_attack_scores = devel_scores[1][:, 0]
        devel_real_scores = devel_scores[0][:, 0]
        test_attack_scores = test_scores[1][:, 0]
        test_real_scores = test_scores[0][:, 0]

        devel_real = devel_real_scores.shape[0]
        devel_attack = devel_attack_scores.shape[0]
        test_real = test_real_scores.shape[0]
        test_attack = test_attack_scores.shape[0]

        thres = threshold_func(devel_attack_scores, devel_real_scores)
        devel_far, devel_frr = farfrr(devel_attack_scores, devel_real_scores,
                                      thres)
        test_far, test_frr = farfrr(test_attack_scores, test_real_scores,
                                    thres)

        retval = []
        retval.append(" threshold: %.4f" % thres)
        retval.append(
            pline("dev ", devel_far, devel_attack, devel_frr, devel_real))
        retval.append(pline("test", test_far, test_attack, test_frr,
                            test_real))

        return retval, thres
Exemplo n.º 4
0
def perf_hter(test_scores, devel_scores, threshold_func):
    """Computes a performance table and returns the HTER for the test and development set, as well as a formatted text with the results and the value of the threshold obtained for the given threshold function
     Keyword parameters:
       test_scores - the scores of the samples in the test set
       devel_scores - the scores of the samples in the development set
       threshold function - the type of threshold
  """

    from bob.measure import farfrr

    devel_attack_scores = devel_scores[1][:, 0]
    devel_real_scores = devel_scores[0][:, 0]
    test_attack_scores = test_scores[1][:, 0]
    test_real_scores = test_scores[0][:, 0]

    devel_real = devel_real_scores.shape[0]
    devel_attack = devel_attack_scores.shape[0]
    test_real = test_real_scores.shape[0]
    test_attack = test_attack_scores.shape[0]

    thres = threshold_func(devel_attack_scores, devel_real_scores)
    devel_far, devel_frr = farfrr(devel_attack_scores, devel_real_scores,
                                  thres)
    test_far, test_frr = farfrr(test_attack_scores, test_real_scores, thres)
    devel_hter = 50 * (devel_far + devel_frr)
    test_hter = 50 * (test_far + test_frr)
    devel_text = " d: FAR %.2f%% / FRR %.2f%% / HTER %.2f%% " % (
        100 * devel_far, 100 * devel_frr, devel_hter)
    test_text = " t: FAR %.2f%% / FRR %.2f%% / HTER %.2f%% " % (
        100 * test_far, 100 * test_frr, test_hter)
    return (test_hter, devel_hter), (test_text, devel_text), thres
Exemplo n.º 5
0
  def perf(devel_scores, test_scores, threshold_func):
  
    from bob.measure import farfrr

    devel_attack_scores = devel_scores[1][:,0]
    devel_real_scores = devel_scores[0][:,0]
    test_attack_scores = test_scores[1][:,0]
    test_real_scores = test_scores[0][:,0]

    devel_real = devel_real_scores.shape[0]
    devel_attack = devel_attack_scores.shape[0]
    test_real = test_real_scores.shape[0]
    test_attack = test_attack_scores.shape[0]

    thres = threshold_func(devel_attack_scores, devel_real_scores)
    devel_far, devel_frr = farfrr(devel_attack_scores, devel_real_scores, thres)
    test_far, test_frr = farfrr(test_attack_scores, test_real_scores, thres)

    retval = {'threshold': '%.4f' % thres}
    d = make_dict('devel-', devel_far, devel_attack, devel_frr, devel_real)
    retval.update(d)
    d = make_dict('test-', test_far, test_attack, test_frr, test_real)
    retval.update(d)

    return retval, thres
Exemplo n.º 6
0
def perf_hter_thorough(test_scores, devel_scores, threshold_func):
    """Computes a performance table and returns the HTER for the test and development set, as well as a formatted text with the results and the value of the threshold obtained for the given threshold function
     Keyword parameters:
       test_scores - the scores of the samples in the test set (tuple)
       devel_scores - the scores of the samples in the development set (tuple)
       threshold function - the type of threshold
  """

    from bob.measure import farfrr

    devel_attack_scores = devel_scores[1]
    devel_real_scores = devel_scores[0]
    test_attack_scores = test_scores[1]
    test_real_scores = test_scores[0]

    devel_attack_scores = devel_attack_scores.reshape([
        len(devel_attack_scores)
    ])  # all the scores whould be arrays with shape (n,)
    devel_real_scores = devel_real_scores.reshape([len(devel_real_scores)])
    test_attack_scores = test_attack_scores.reshape([len(test_attack_scores)])
    test_real_scores = test_real_scores.reshape([len(test_real_scores)])

    thres = threshold_func(devel_attack_scores, devel_real_scores)
    devel_far, devel_frr = farfrr(devel_attack_scores, devel_real_scores,
                                  thres)
    test_far, test_frr = farfrr(test_attack_scores, test_real_scores, thres)
    return (devel_far, devel_frr), (test_far, test_frr)
Exemplo n.º 7
0
def perf_hter_thorough(test_scores, devel_scores, threshold_func):
    """Computes a performance table and returns the HTER for the test and development set, as well as a formatted text with the results and the value of the threshold obtained for the given threshold function
     Keyword parameters:
       test_scores - the scores of the samples in the test set (tuple)
       devel_scores - the scores of the samples in the development set (tuple)
       threshold function - the type of threshold
  """

    from bob.measure import farfrr

    devel_attack_scores = devel_scores[1]
    devel_real_scores = devel_scores[0]
    test_attack_scores = test_scores[1]
    test_real_scores = test_scores[0]

    devel_attack_scores = devel_attack_scores.reshape(
        [len(devel_attack_scores)]
    )  # all the scores whould be arrays with shape (n,)
    devel_real_scores = devel_real_scores.reshape([len(devel_real_scores)])
    test_attack_scores = test_attack_scores.reshape([len(test_attack_scores)])
    test_real_scores = test_real_scores.reshape([len(test_real_scores)])

    thres = threshold_func(devel_attack_scores, devel_real_scores)
    devel_far, devel_frr = farfrr(devel_attack_scores, devel_real_scores, thres)
    test_far, test_frr = farfrr(test_attack_scores, test_real_scores, thres)
    return (devel_far, devel_frr), (test_far, test_frr)
Exemplo n.º 8
0
def test_basic_ratios():

  from . import farfrr, precision_recall, f_score

  # We test the basic functionaly on FAR and FRR calculation. The first
  # example is separable, with a separation threshold of about 3.0

  positives = bob.io.base.load(F('linsep-positives.hdf5'))
  negatives = bob.io.base.load(F('linsep-negatives.hdf5'))

  minimum = min(positives.min(), negatives.min())
  maximum = max(positives.max(), negatives.max())

  # If we take a threshold on the minimum, the FAR should be 1.0 and the FRR
  # should be 0.0. Precision should be 0.5, recall should be 1.0
  far, frr = farfrr(negatives, positives, minimum - 0.1)
  nose.tools.eq_(far, 1.0)
  nose.tools.eq_(frr, 0.0)
  prec, recall = precision_recall(negatives, positives, minimum - 0.1)
  nose.tools.eq_(prec, 0.5)
  nose.tools.eq_(recall, 1.0)

  # Similarly, if we take a threshold on the maximum, the FRR should be 1.0
  # while the FAR should be 0.0. Both precision and recall should be 0.0.
  far, frr = farfrr(negatives, positives, maximum + 0.1)
  nose.tools.eq_(far, 0.0)
  nose.tools.eq_(frr, 1.0)
  prec, recall = precision_recall(negatives, positives, maximum + 0.1)
  nose.tools.eq_(prec, 0.0)
  nose.tools.eq_(recall, 0.0)

  # If we choose the appropriate threshold, we should get 0.0 for both FAR
  # and FRR. Precision will be 1.0, recall will be 1.0
  far, frr = farfrr(negatives, positives, 3.0)
  nose.tools.eq_(far, 0.0)
  nose.tools.eq_(frr, 0.0)
  prec, recall = precision_recall(negatives, positives, 3.0)
  nose.tools.eq_(prec, 1.0)
  nose.tools.eq_(recall, 1.0)

  # Testing the values of F-score depending on different choices of the
  # threshold
  f_score_ = f_score(negatives, positives, minimum - 0.1)
  nose.tools.assert_almost_equal(f_score_, 0.66666667)
  f_score_ = f_score(negatives, positives, minimum - 0.1, 2)
  nose.tools.assert_almost_equal(f_score_, 0.83333333)

  f_score_ = f_score(negatives, positives, maximum + 0.1)
  nose.tools.eq_(f_score_, 0.0)
  f_score_ = f_score(negatives, positives, maximum + 0.1, 2)
  nose.tools.eq_(f_score_, 0.0)

  f_score_ = f_score(negatives, positives, 3.0)
  nose.tools.eq_(f_score_, 1.0)
  f_score_ = f_score(negatives, positives, 3.0, 2)
  nose.tools.eq_(f_score_, 1.0)
Exemplo n.º 9
0
def weighted_neg_error_rate_criteria(data,
                                     weight,
                                     thres,
                                     beta=0.5,
                                     criteria="eer"):
    """Given the single value for the weight parameter balancing between
    impostors and spoofing attacks and a threshold, calculates the error rates
    and their relationship depending on the criteria (difference in case of
    'eer', hter in case of 'min-hter' criteria)
    Keyword parameters:

      - data - the development data used to determine the threshold. List on 4
      numpy.arrays containing: negatives (licit), positives (licit),
      negatives (spoof), positives (spoof)
      - weight - the weight parameter balancing between impostors and spoofing
      attacks
      - thres - the given threshold
      - beta - the weight parameter balancing between real accesses and all the
      negative samples (impostors and spoofing attacks). Note that this
      parameter will be overridden and not considered if the selected criteria
      is 'min-hter'.
      - criteria - 'eer', 'wer' or 'min-hter' criteria for decision threshold
    """

    licit_neg = data[0]
    licit_pos = data[1]
    spoof_neg = data[2]
    spoof_pos = data[3]  # unpacking the data
    farfrr_licit = farfrr(licit_neg, licit_pos, thres)
    farfrr_spoof = farfrr(spoof_neg, spoof_pos, thres)

    frr = farfrr_licit[1]  # farfrr_spoof[1] should have the same value
    far_i = farfrr_licit[0]
    far_s = farfrr_spoof[0]

    far_w = (1 - weight) * far_i + weight * far_s

    if criteria == "eer":
        if beta == 0.5:
            return abs(far_w - frr)
        else:
            # return abs(far_w - frr)
            return abs((1 - beta) * frr - beta * far_w)

    elif criteria == "min-hter":
        return (far_w + frr) / 2

    else:
        return (1 - beta) * frr + beta * far_w
Exemplo n.º 10
0
    def _numbers(self, neg, pos, spoof, threshold, fta):
        """Computes each metric value"""
        # fpr and fnr
        fmr, fnmr = farfrr(neg, pos, threshold)
        hter = (fmr + fnmr) / 2.0
        far = fmr * (1 - fta)
        frr = fta + fnmr * (1 - fta)

        ni = neg.shape[0]  # number of impostors
        fm = int(round(fmr * ni))  # number of false accepts
        nc = pos.shape[0]  # number of clients
        fnm = int(round(fnmr * nc))  # number of false rejects

        # precision and recall
        precision, recall = precision_recall(neg, pos, threshold)

        # f_score
        f1_score = f_score(neg, pos, threshold, 1)

        # AUC ROC
        auc = roc_auc_score(neg, pos)
        auc_log = roc_auc_score(neg, pos, log_scale=True)

        # IAPMR at threshold
        iapmr, _ = farfrr(spoof, [0.0], threshold)
        spoof_total = len(spoof)
        spoof_match = int(round(iapmr * spoof_total))

        return {
            "fta": fta,
            "fmr": fmr,
            "fnmr": fnmr,
            "hter": hter,
            "far": far,
            "frr": frr,
            "fm": fm,
            "ni": ni,
            "fnm": fnm,
            "nc": nc,
            "precision": precision,
            "recall": recall,
            "f1_score": f1_score,
            "auc": auc,
            "auc_log": auc_log,
            "iapmr": iapmr,
            "spoof_match": spoof_match,
            "spoof_total": spoof_total,
        }
Exemplo n.º 11
0
def test_obvious_thresholds():
  from . import far_threshold, frr_threshold, farfrr
  M = 10
  neg = numpy.arange(M, dtype=float)
  pos = numpy.arange(M, 2 * M, dtype=float)

  for far, frr in zip(numpy.arange(0, 2 * M + 1, dtype=float) / M / 2,
                      numpy.arange(0, 2 * M + 1, dtype=float) / M / 2):
    far, expected_far = round(far, 2), math.floor(far * 10) / 10
    frr, expected_frr = round(frr, 2), math.floor(frr * 10) / 10
    calculated_far_threshold = far_threshold(neg, pos, far)
    pred_far, _ = farfrr(neg, pos, calculated_far_threshold)

    calculated_frr_threshold = frr_threshold(neg, pos, frr)
    _, pred_frr = farfrr(neg, pos, calculated_frr_threshold)
    assert pred_far <= far, (pred_far, far, calculated_far_threshold)
    assert pred_far == expected_far, (pred_far, far, calculated_far_threshold)
    assert pred_frr <= frr, (pred_frr, frr, calculated_frr_threshold)
    assert pred_frr == expected_frr, (pred_frr, frr, calculated_frr_threshold)
Exemplo n.º 12
0
def error_rates_at_weight(licit_neg,
                          licit_pos,
                          spoof_neg,
                          spoof_pos,
                          omega,
                          threshold,
                          beta=0.5):
    """Calculates several error rates: FRR, FAR (zero-effort impostors), SFAR,
        FAR_w, HTER_w for a given value of w. It returns the calculated threshold
        as a last argument

      Keyword arguments:

        - licit_neg - numpy.array of scores for the negatives (licit scenario)
        - licit_pos - numpy.array of scores for the positives (licit scenario)
        - spoof_neg - numpy.array of scores for the negatives (spoof scenario)
        - spoof_pos - numpy.array of scores for the positives (spoof scenario)
        - threshold - the given threshold
        - omega - the omega parameter balancing between impostors and spoofing
        attacks
        - beta - the weight parameter balancing between real accesses and all the

    negative samples (impostors and spoofing attacks).
    """

    farfrr_licit = farfrr(
        licit_neg, licit_pos,
        threshold)  # calculate test frr @ threshold (licit scenario)
    farfrr_spoof = farfrr(
        spoof_neg, spoof_pos,
        threshold)  # calculate test frr @ threshold (spoof scenario)

    # we can take this value from farfrr_spoof as well, it doesn't matter
    frr = farfrr_licit[1]
    far = farfrr_licit[0]
    sfar = farfrr_spoof[0]

    far_w = weighted_err(far, sfar, omega)
    hter_w = (far_w + frr) / 2
    wer_wb = weighted_err(frr, far_w, beta)

    return (frr, far, sfar, far_w, wer_wb, hter_w, threshold)
Exemplo n.º 13
0
def test_mindcf():
  """ Test outlier scores in negative set
  """
  from bob.measure import min_weighted_error_rate_threshold, farfrr
  cost = 0.99
  negatives = [-3, -2, -1, -0.5, 4]
  positives = [0.5, 3]
  th = min_weighted_error_rate_threshold(negatives, positives, cost, True)
  far, frr = farfrr(negatives, positives, th)
  mindcf = (cost * far + (1-cost)*frr)*100
  assert mindcf< 1.0 + 1e-8
Exemplo n.º 14
0
 def compute(self, idx, input_scores, input_names):
     """Implements plots"""
     dev_scores = clean_scores(input_scores[0])
     if self._eval:
         eval_scores = clean_scores(input_scores[1])
     fmr_list = np.linspace(0, 1, 100)
     iapmr_list = []
     for i, fmr in enumerate(fmr_list):
         thr = far_threshold(dev_scores["licit_neg"],
                             dev_scores["licit_pos"], fmr)
         iapmr_list.append(farfrr(eval_scores["spoof"], [0.0], thr)[0])
         # re-calculate fmr since threshold might give a different result
         # for fmr.
         fmr_list[i], _ = farfrr(eval_scores["licit_neg"], [0.0], thr)
     label = (self._legends[idx]
              if self._legends is not None else f"system {idx+1}")
     logger.info(f"Plot FmrIapmr using: {input_names[1]}")
     if self._semilogx:
         mpl.semilogx(fmr_list, iapmr_list, label=label)
     else:
         mpl.plot(fmr_list, iapmr_list, label=label)
Exemplo n.º 15
0
    def perf(devel_scores, test_scores, threshold_func):

        from bob.measure import farfrr

        devel_attack_scores = devel_scores[1][:, 0]
        devel_real_scores = devel_scores[0][:, 0]
        test_attack_scores = test_scores[1][:, 0]
        test_real_scores = test_scores[0][:, 0]

        devel_real = devel_real_scores.shape[0]
        devel_attack = devel_attack_scores.shape[0]
        test_real = test_real_scores.shape[0]
        test_attack = test_attack_scores.shape[0]

        thres = threshold_func(devel_attack_scores, devel_real_scores)
        devel_far, devel_frr = farfrr(devel_attack_scores, devel_real_scores, thres)
        test_far, test_frr = farfrr(test_attack_scores, test_real_scores, thres)

        retval = []
        retval.append(" threshold: %.4f" % thres)
        retval.append(pline("dev ", devel_far, devel_attack, devel_frr, devel_real))
        retval.append(pline("test", test_far, test_attack, test_frr, test_real))

        return retval, thres
Exemplo n.º 16
0
    def _lines(self, threshold, label, neg, pos, idx, **kwargs):
        spoof = neg[1]
        neg = neg[0]
        pos = pos[0]
        # plot EER treshold vertical line
        super(HistVuln, self)._lines(threshold, label, neg, pos, idx, **kwargs)

        if "iapmr_line" not in self._ctx.meta or self._ctx.meta["iapmr_line"]:
            # Plot iapmr_line (accepted PA vs threshold)
            iapmr, _ = farfrr(spoof, [0.0], threshold)
            ax2 = mpl.twinx()
            # we never want grid lines on axis 2
            ax2.grid(False)
            real_data = self._ctx.meta.get("real_data", True)
            _iapmr_plot(spoof, threshold, iapmr, real_data=real_data)
            n = idx % self._step_print
            col = n % self._ncols
            rest_print = (self.n_systems -
                          int(idx / self._step_print) * self._step_print)
            if col == self._ncols - 1 or n == rest_print - 1:
                ax2.set_ylabel("IAPMR (%)", color="C3")
            ax2.tick_params(axis="y", colors="C3")
            ax2.yaxis.label.set_color("C3")
            ax2.spines["right"].set_color("C3")
Exemplo n.º 17
0
 def _get_farfrr(self, x, y, thres):
     points = farfrr(x, y, thres)
     points2 = (points[0], 1 - points[1])
     return points, points2
Exemplo n.º 18
0
def test_thresholding():

  from . import eer_threshold, far_threshold, frr_threshold, farfrr, \
      correctly_classified_positives, correctly_classified_negatives, \
      min_hter_threshold

  def count(array, value=True):
    """Counts occurrences of a certain value in an array"""
    return list(array == value).count(True)

  # This example will demonstrate and check the use of eer_threshold() to
  # calculate the threshold that minimizes the EER.

  # This test set is not separable.
  positives = bob.io.base.load(F('nonsep-positives.hdf5'))
  negatives = bob.io.base.load(F('nonsep-negatives.hdf5'))
  threshold = eer_threshold(negatives, positives)

  sorted_positives = numpy.sort(positives)
  sorted_negatives = numpy.sort(negatives)

  # Of course we have to make sure that will set the EER correctly:
  ccp = count(correctly_classified_positives(positives, threshold))
  ccn = count(correctly_classified_negatives(negatives, threshold))
  assert (ccp - ccn) <= 1

  for t in (0, 0.001, 0.1, 0.5, 0.9, 0.999, 1):
    # Lets also test the far_threshold and the frr_threshold functions
    threshold_far = far_threshold(sorted_negatives, [], t, is_sorted=True)
    threshold_frr = frr_threshold([], sorted_positives, t, is_sorted=True)
    # Check that the requested FAR and FRR values are smaller than the
    # requested ones
    far = farfrr(negatives, positives, threshold_far)[0]
    frr = farfrr(negatives, positives, threshold_frr)[1]
    if not math.isnan(threshold_far):
      assert far <= t, (far, t)
      assert t - far <= 0.1
    if not math.isnan(threshold_frr):
      assert frr <= t, (frr, t)
      # test that the values are at least somewhere in the range
      assert t - frr <= 0.1

  # If the set is separable, the calculation of the threshold is a little bit
  # trickier, as you have no points in the middle of the range to compare
  # things to. This is where the currently used recursive algorithm seems to
  # do better. Let's verify
  positives = bob.io.base.load(F('linsep-positives.hdf5'))
  negatives = bob.io.base.load(F('linsep-negatives.hdf5'))
  threshold = eer_threshold(negatives, positives)
  # the result here is 3.2 (which is what is expect ;-)
  assert threshold == 3.2

  # Of course we have to make sure that will set the EER correctly:
  ccp = count(correctly_classified_positives(positives, threshold))
  ccn = count(correctly_classified_negatives(negatives, threshold))
  nose.tools.eq_(ccp, ccn)

  # The second option for the calculation of the threshold is to use the
  # minimum HTER.
  threshold2 = min_hter_threshold(negatives, positives)
  assert threshold2 == 3.2
  nose.tools.eq_(threshold, threshold2)  # in this particular case

  # Of course we have to make sure that will set the EER correctly:
  ccp = count(correctly_classified_positives(positives, threshold2))
  ccn = count(correctly_classified_negatives(negatives, threshold2))
  nose.tools.eq_(ccp, ccn)
Exemplo n.º 19
0
 def _get_farfrr(self, x, y, thres):
     points = farfrr(x, y, thres)
     return points, [ppndf(i) for i in points]
Exemplo n.º 20
0
    def compute(self, idx, input_scores, input_names):
        """Implements plots"""
        dev_scores = clean_scores(input_scores[0])
        if self._eval:
            eval_scores = clean_scores(input_scores[1])
        else:
            eval_scores = {"licit_neg": [], "licit_pos": [], "spoof": []}

        mpl.figure(1)
        if self._eval:
            logger.info(f"dev curve using {input_names[0]}")
            self._plot(
                dev_scores["licit_neg"],
                dev_scores["licit_pos"],
                dev_scores["spoof"],
                npoints=self._points,
                tpr=self._tpr,
                min_far=self._min_dig,
                color=self._colors[idx],
                linestyle=self._linestyles[idx],
                label=self._label("dev", idx),
                alpha=self._alpha,
            )
            if not self._fnmrs_at:
                logger.info("Plotting fnmr line at dev eer threshold for dev")
                dev_threshold = get_thres(
                    criter="eer",
                    neg=dev_scores["licit_neg"],
                    pos=dev_scores["licit_pos"],
                )
                _, fnmr_at_dev_threshold = farfrr([0.0],
                                                  dev_scores["licit_pos"],
                                                  dev_threshold)
            fnmrs_dev = self._fnmrs_at or [fnmr_at_dev_threshold]
            self._draw_fnmrs(idx, dev_scores, fnmrs_dev)

            if self._split:
                mpl.figure(2)

            # Add the eval plot
            linestyle = "--" if not self._split else self._linestyles[idx]
            logger.info(f"eval curve using {input_names[1]}")
            self._plot(
                eval_scores["licit_neg"],
                eval_scores["licit_pos"],
                eval_scores["spoof"],
                linestyle=linestyle,
                npoints=self._points,
                tpr=self._tpr,
                min_far=self._min_dig,
                color=self._colors[idx],
                label=self._label("eval", idx),
                alpha=self._alpha,
            )
            if not self._fnmrs_at:
                logger.info("printing fnmr at dev eer threshold for eval")
                _, fnmr_at_dev_threshold = farfrr([0.0],
                                                  eval_scores["licit_pos"],
                                                  dev_threshold)
            fnmrs_dev = self._fnmrs_at or [fnmr_at_dev_threshold]
            self._draw_fnmrs(idx, eval_scores, fnmrs_dev, True)

        # Only dev scores available
        else:
            logger.info(f"dev curve using {input_names[0]}")
            self._plot(
                dev_scores["licit_neg"],
                dev_scores["licit_pos"],
                dev_scores["spoof"],
                npoints=self._points,
                tpr=self._tpr,
                min_far=self._min_dig,
                color=self._colors[idx],
                linestyle=self._linestyles[idx],
                label=self._label("dev", idx),
                alpha=self._alpha,
            )
            if not self._fnmrs_at:
                logger.info("Plotting fnmr line at dev eer threshold for dev")
                dev_threshold = get_thres(
                    criter="eer",
                    neg=dev_scores["licit_neg"],
                    pos=dev_scores["licit_pos"],
                )
                _, fnmr_at_dev_threshold = farfrr([0.0],
                                                  dev_scores["licit_pos"],
                                                  dev_threshold)
            fnmrs_dev = self._fnmrs_at or [fnmr_at_dev_threshold]
            self._draw_fnmrs(idx, dev_scores, fnmrs_dev)