Python ClassificationElementFactory.ClassificationElementFactory示例，smqtk.representation.ClassificationElementFactory.ClassificationElementFactory Python示例

示例#1

0

显示文件

    def test_classify_elements_all_preexisting_overwrite(self):
        """ Test generating classification elements where all elements
        generated by the factory claim to already have classifications but
        overwrite is True this time."""
        d_elems = [
            DescriptorMemoryElement('', i).set_vector(v)
            for i, v in enumerate([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
        ]

        # Mock a factory to produce elements whose ``has_classifications``
        # method returns False.
        m_ce_type = mock.MagicMock(name="MockedClassificationElementType")
        c_factory = ClassificationElementFactory(m_ce_type, {})
        # Mocking that elements have no classifications set
        m_ce_inst = m_ce_type.from_config()
        m_ce_inst.has_classifications.return_value = True

        list(
            self.inst.classify_elements(d_elems,
                                        factory=c_factory,
                                        overwrite=True))
        # Method not called becuase of overwrite short-circuit
        assert m_ce_inst.has_classifications.call_count == 0
        assert m_ce_inst.set_classification.call_count == 3
        # Check that expected classification returns from dummy generator were
        # set to factory-created elements.
        m_ce_inst.set_classification.assert_any_call({'test': 1})
        m_ce_inst.set_classification.assert_any_call({'test': 4})
        m_ce_inst.set_classification.assert_any_call({'test': 7})

        # Dummy classifier iterator completed to the end.
        self.inst._post_iterator_check.assert_called_once()

示例#2

0

显示文件

    def test_classify_elements_all_preexisting(self):
        """ Test generating classification elements where all elements
        generated by the factory claim to already have classifications and
        overwrite is False."""
        d_elems = [
            DescriptorMemoryElement('', i).set_vector(v)
            for i, v in enumerate([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
        ]

        # Mock a factory to produce elements whose ``has_classifications``
        # method returns False.
        m_ce_type = mock.MagicMock(name="MockedClassificationElementType")
        c_factory = ClassificationElementFactory(m_ce_type, {})
        # Mocking that elements have no classifications set
        m_ce_inst = m_ce_type.from_config()
        m_ce_inst.has_classifications.return_value = True

        list(
            self.inst.classify_elements(d_elems,
                                        factory=c_factory,
                                        overwrite=False))
        assert m_ce_inst.has_classifications.call_count == 3
        m_ce_inst.set_classification.assert_not_called()

        # Dummy classifier iterator completed to the end.
        self.inst._post_iterator_check.assert_called_once()

示例#3

0

显示文件

    def test_classify_elements_none_preexisting(self):
        """ Test generating classification elements where none generated by the
        factory have existing vectors. i.e. all descriptor elements passed to
        underlying classification method."""
        d_elems = [
            DescriptorMemoryElement('', i).set_vector(v)
            for i, v in enumerate([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
        ]

        # Mock a factory to produce elements whose ``has_classifications``
        # method returns False.
        m_ce_type = mock.MagicMock(name="MockedClassificationElementType")
        c_factory = ClassificationElementFactory(m_ce_type, {})
        # Mocking that elements have no classifications set
        m_ce_inst = m_ce_type.from_config()
        m_ce_inst.has_classifications.return_value = False

        list(
            self.inst.classify_elements(d_elems,
                                        factory=c_factory,
                                        overwrite=False))
        assert m_ce_inst.has_classifications.call_count == 3
        assert m_ce_inst.set_classification.call_count == 3
        # Check that expected classification returns from dummy generator were
        # set to factory-created elements.
        m_ce_inst.set_classification.assert_any_call({'test': 1})
        m_ce_inst.set_classification.assert_any_call({'test': 4})
        m_ce_inst.set_classification.assert_any_call({'test': 7})

        # Dummy classifier iterator completed to the end.
        self.inst._post_iterator_check.assert_called_once()

示例#4

0

显示文件

文件： score_phone_numbers.py 项目： sanyarud/SMQTK

    plt.ylabel("Precision")
    plt.legend(loc='best', fancybox=True, framealpha=0.5)
    plt.savefig(PLOT_PR_OUTPUT)

else:
    # Using the final trained classifier
    with open(CLASSIFIER_TRAINING_CONFIG_JSON) as f:
        classifier_config = json.load(f)

    log.info("Loading plugins")
    descriptor_index = MemoryDescriptorIndex(
        file_cache=DESCRIPTOR_INDEX_FILE_CACHE)
    #: :type: smqtk.algorithms.Classifier
    classifier = from_plugin_config(classifier_config['plugins']['classifier'],
                                    get_classifier_impls())
    c_factory = ClassificationElementFactory(MemoryClassificationElement, {})

    #: :type: dict[str, list[str]]
    phone2shas = json.load(open(PHONE_SHA1_JSON))
    #: :type: dict[str, float]
    phone2score = {}

    log.info("Classifying phone imagery descriptors")
    i = 0
    descriptor_index_shas = set(descriptor_index.iterkeys())
    for p in phone2shas:
        log.info('%s (%d / %d)', p, i + 1, len(phone2shas))
        # Not all source "images" have descriptors since some URLs returned
        # non-image files. Intersect phone sha's with what was actually
        # computed. Warn if this reduces descriptors for classification to zero.
        indexed_shas = set(phone2shas[p]) & descriptor_index_shas

示例#5

0

显示文件

        def test_simple_classification(self):
            """
            simple LibSvmClassifier test - 2-class

            Test libSVM classification functionality using random constructed
            data, training the y=0.5 split
            """
            DIM = 2
            N = 1000
            POS_LABEL = 'positive'
            NEG_LABEL = 'negative'
            p = multiprocessing.pool.ThreadPool()
            d_factory = DescriptorElementFactory(DescriptorMemoryElement, {})
            c_factory = ClassificationElementFactory(
                MemoryClassificationElement, {})

            def make_element(argtup):
                (i, v) = argtup
                d = d_factory.new_descriptor('test', i)
                d.set_vector(v)
                return d

            # Constructing artificial descriptors
            x = numpy.random.rand(N, DIM)
            x_pos = x[x[:, 1] <= 0.45]
            x_neg = x[x[:, 1] >= 0.55]

            d_pos = p.map(make_element, enumerate(x_pos))
            d_neg = p.map(make_element, enumerate(x_neg, start=N // 2))

            # Create/Train test classifier
            classifier = LibSvmClassifier(
                train_params={
                    '-t': 0,  # linear kernel
                    '-b': 1,  # enable probability estimates
                    '-c': 2,  # SVM-C parameter C
                    '-q': '',  # quite mode
                },
                normalize=None,  # DO NOT normalize descriptors
            )
            classifier.train({POS_LABEL: d_pos, NEG_LABEL: d_neg})

            # Test classifier
            x = numpy.random.rand(N, DIM)
            x_pos = x[x[:, 1] <= 0.45]
            x_neg = x[x[:, 1] >= 0.55]

            d_pos = p.map(make_element, enumerate(x_pos, N))
            d_neg = p.map(make_element, enumerate(x_neg, N + N // 2))

            d_pos_sync = {}  # for comparing to async
            for d in d_pos:
                c = classifier.classify(d, c_factory)
                ntools.assert_equal(
                    c.max_label(), POS_LABEL,
                    "Found False positive: %s :: %s" %
                    (d.vector(), c.get_classification()))
                d_pos_sync[d] = c

            d_neg_sync = {}
            for d in d_neg:
                c = classifier.classify(d, c_factory)
                ntools.assert_equal(
                    c.max_label(), NEG_LABEL,
                    "Found False negative: %s :: %s" %
                    (d.vector(), c.get_classification()))
                d_neg_sync[d] = c

            # test that async classify produces the same results
            # -- d_pos
            m_pos = classifier.classify_async(d_pos, c_factory)
            ntools.assert_equal(
                m_pos, d_pos_sync,
                "Async computation of pos set did not yield "
                "the same results as synchronous "
                "classification.")
            # -- d_neg
            m_neg = classifier.classify_async(d_neg, c_factory)
            ntools.assert_equal(
                m_neg, d_neg_sync,
                "Async computation of neg set did not yield "
                "the same results as synchronous "
                "classification.")
            # -- combined -- threaded
            combined_truth = dict(d_pos_sync.items())
            combined_truth.update(d_neg_sync)
            m_combined = classifier.classify_async(
                d_pos + d_neg,
                c_factory,
                use_multiprocessing=False,
            )
            ntools.assert_equal(
                m_combined, combined_truth,
                "Async computation of all test descriptors "
                "did not yield the same results as "
                "synchronous classification.")
            # -- combined -- multiprocess
            m_combined = classifier.classify_async(
                d_pos + d_neg,
                c_factory,
                use_multiprocessing=True,
            )
            ntools.assert_equal(
                m_combined, combined_truth,
                "Async computation of all test descriptors "
                "(mixed order) did not yield the same results "
                "as synchronous classification.")

            # Closing resources
            p.close()
            p.join()

示例#6

0

显示文件

        def test_no_save_model_pickle(self):
            # Test model preservation across pickling even without model cache
            # file paths set.
            classifier = LibSvmClassifier(
                train_params={
                    '-t': 0,  # linear kernel
                    '-b': 1,  # enable probability estimates
                    '-c': 2,  # SVM-C parameter C
                    '-q': '',  # quite mode
                },
                normalize=None,  # DO NOT normalize descriptors
            )
            ntools.assert_true(classifier.svm_model is None)
            # Empty model should not trigger __LOCAL__ content in pickle
            ntools.assert_not_in('__LOCAL__', classifier.__getstate__())
            _ = cPickle.loads(cPickle.dumps(classifier))

            # train arbitrary model (same as ``test_simple_classification``)
            DIM = 2
            N = 1000
            POS_LABEL = 'positive'
            NEG_LABEL = 'negative'
            d_factory = DescriptorElementFactory(DescriptorMemoryElement, {})
            c_factory = ClassificationElementFactory(
                MemoryClassificationElement, {})

            def make_element(argtup):
                (i, v) = argtup
                d = d_factory.new_descriptor('test', i)
                d.set_vector(v)
                return d

            # Constructing artificial descriptors
            x = numpy.random.rand(N, DIM)
            x_pos = x[x[:, 1] <= 0.45]
            x_neg = x[x[:, 1] >= 0.55]
            p = multiprocessing.pool.ThreadPool()
            d_pos = p.map(make_element, enumerate(x_pos))
            d_neg = p.map(make_element, enumerate(x_neg, start=N // 2))
            p.close()
            p.join()

            # Training
            classifier.train({POS_LABEL: d_pos, NEG_LABEL: d_neg})

            # Test original classifier
            t_v = numpy.random.rand(DIM)
            t = d_factory.new_descriptor('query', 0)
            t.set_vector(t_v)
            c_expected = classifier.classify(t, c_factory)

            # Should see __LOCAL__ content in pickle state now
            p_state = classifier.__getstate__()
            ntools.assert_in('__LOCAL__', p_state)
            ntools.assert_in('__LOCAL_LABELS__', p_state)
            ntools.assert_in('__LOCAL_MODEL__', p_state)
            ntools.assert_true(len(p_state['__LOCAL_LABELS__']) > 0)
            ntools.assert_true(len(p_state['__LOCAL_MODEL__']) > 0)

            # Restored classifier should classify the same test descriptor the
            # same
            #: :type: LibSvmClassifier
            classifier2 = cPickle.loads(cPickle.dumps(classifier))
            c_post_pickle = classifier2.classify(t, c_factory)
            # There may be floating point error, so extract actual confidence
            # values and check post round
            c_pp_positive = c_post_pickle[POS_LABEL]
            c_pp_negative = c_post_pickle[NEG_LABEL]
            c_e_positive = c_expected[POS_LABEL]
            c_e_negative = c_expected[NEG_LABEL]
            ntools.assert_almost_equal(c_e_positive, c_pp_positive, 5)
            ntools.assert_almost_equal(c_e_negative, c_pp_negative, 5)

示例#7

0

显示文件

        def test_simple_multiclass_classification(self):
            """
            simple LibSvmClassifier test - 3-class

            Test libSVM classification functionality using random constructed
            data, training the y=0.33 and y=.66 split
            """
            DIM = 2
            N = 1000
            P1_LABEL = 'p1'
            P2_LABEL = 'p2'
            P3_LABEL = 'p3'
            p = multiprocessing.pool.ThreadPool()
            d_factory = DescriptorElementFactory(DescriptorMemoryElement, {})
            c_factory = ClassificationElementFactory(
                MemoryClassificationElement, {})
            di = 0

            def make_element(argtup):
                (i, v) = argtup
                d = d_factory.new_descriptor('test', i)
                d.set_vector(v)
                return d

            # Constructing artificial descriptors
            x = numpy.random.rand(N, DIM)
            x_p1 = x[x[:, 1] <= 0.30]
            x_p2 = x[(x[:, 1] >= 0.36) & (x[:, 1] <= 0.63)]
            x_p3 = x[x[:, 1] >= 0.69]

            d_p1 = p.map(make_element, enumerate(x_p1, di))
            di += len(d_p1)
            d_p2 = p.map(make_element, enumerate(x_p2, di))
            di += len(d_p2)
            d_p3 = p.map(make_element, enumerate(x_p3, di))
            di += len(d_p3)

            # Create/Train test classifier
            classifier = LibSvmClassifier(
                train_params={
                    '-t': 0,  # linear kernel
                    '-b': 1,  # enable probability estimates
                    '-c': 2,  # SVM-C parameter C
                    '-q': ''  # quite mode
                },
                normalize=None,  # DO NOT normalize descriptors
            )
            classifier.train({P1_LABEL: d_p1, P2_LABEL: d_p2, P3_LABEL: d_p3})

            # Test classifier
            x = numpy.random.rand(N, DIM)
            x_p1 = x[x[:, 1] <= 0.30]
            x_p2 = x[(x[:, 1] >= 0.36) & (x[:, 1] <= 0.63)]
            x_p3 = x[x[:, 1] >= 0.69]

            d_p1 = p.map(make_element, enumerate(x_p1, di))
            di += len(d_p1)
            d_p2 = p.map(make_element, enumerate(x_p2, di))
            di += len(d_p2)
            d_p3 = p.map(make_element, enumerate(x_p3, di))
            di += len(d_p3)

            d_p1_sync = {}
            for d in d_p1:
                c = classifier.classify(d, c_factory)
                ntools.assert_equal(
                    c.max_label(), P1_LABEL, "Incorrect %s label: %s :: %s" %
                    (P1_LABEL, d.vector(), c.get_classification()))
                d_p1_sync[d] = c

            d_p2_sync = {}
            for d in d_p2:
                c = classifier.classify(d, c_factory)
                ntools.assert_equal(
                    c.max_label(), P2_LABEL, "Incorrect %s label: %s :: %s" %
                    (P2_LABEL, d.vector(), c.get_classification()))
                d_p2_sync[d] = c

            d_neg_sync = {}
            for d in d_p3:
                c = classifier.classify(d, c_factory)
                ntools.assert_equal(
                    c.max_label(), P3_LABEL, "Incorrect %s label: %s :: %s" %
                    (P3_LABEL, d.vector(), c.get_classification()))
                d_neg_sync[d] = c

            # test that async classify produces the same results
            # -- p1
            async_p1 = classifier.classify_async(d_p1, c_factory)
            ntools.assert_equal(
                async_p1, d_p1_sync,
                "Async computation of p1 set did not yield "
                "the same results as synchronous computation.")
            # -- p2
            async_p2 = classifier.classify_async(d_p2, c_factory)
            ntools.assert_equal(
                async_p2, d_p2_sync,
                "Async computation of p2 set did not yield "
                "the same results as synchronous computation.")
            # -- neg
            async_neg = classifier.classify_async(d_p3, c_factory)
            ntools.assert_equal(
                async_neg, d_neg_sync,
                "Async computation of neg set did not yield "
                "the same results as synchronous computation.")
            # -- combined -- threaded
            sync_combined = dict(d_p1_sync.items())
            sync_combined.update(d_p2_sync)
            sync_combined.update(d_neg_sync)
            async_combined = classifier.classify_async(
                d_p1 + d_p2 + d_p3, c_factory, use_multiprocessing=False)
            ntools.assert_equal(
                async_combined, sync_combined,
                "Async computation of all test descriptors "
                "did not yield the same results as "
                "synchronous classification.")
            # -- combined -- multiprocess
            async_combined = classifier.classify_async(
                d_p1 + d_p2 + d_p3, c_factory, use_multiprocessing=True)
            ntools.assert_equal(
                async_combined, sync_combined,
                "Async computation of all test descriptors "
                "(mixed order) did not yield the same results "
                "as synchronous classification.")

            # Closing resources
            p.close()
            p.join()

示例#8

0

显示文件

from smqtk.representation import ClassificationElementFactory
from smqtk.representation.classification_element.memory import \
    MemoryClassificationElement


# Default classifier element factory for interfaces.
DFLT_CLASSIFIER_FACTORY = ClassificationElementFactory(
    MemoryClassificationElement, {}
)

示例#9

0

显示文件

文件： test_libsvm.py 项目： msarahan/SMQTK

        def test_simple_multiclass_classification(self):
            """
            Test libSVM classification functionality using random constructed
            data, training the y=0.33 and y=.66 split
            """
            DIM = 2
            N = 1000
            P1_LABEL = 'p1'
            P2_LABEL = 'p2'
            p = multiprocessing.pool.ThreadPool()
            d_factory = DescriptorElementFactory(DescriptorMemoryElement, {})
            c_factory = ClassificationElementFactory(MemoryClassificationElement, {})
            di = 0

            def make_element((i, v)):
                d = d_factory.new_descriptor('test', i)
                d.set_vector(v)
                return d

            # Constructing artificial descriptors
            x = numpy.random.rand(N, DIM)
            x_p1 = x[x[:, 1] <= 0.30]
            x_p2 = x[(x[:, 1] >= 0.36) & (x[:, 1] <= 0.63)]
            x_neg = x[x[:, 1] >= 0.69]

            d_p1 = p.map(make_element, enumerate(x_p1, di))
            di += len(d_p1)
            d_p2 = p.map(make_element, enumerate(x_p2, di))
            di += len(d_p2)
            d_neg = p.map(make_element, enumerate(x_neg, di))
            di += len(d_neg)

            # Create/Train test classifier
            classifier = LibSvmClassifier(
                train_params={
                    '-t': 0,  # linear kernel
                    '-b': 1,  # enable probability estimates
                    '-c': 2,  # SVM-C parameter C
                    '-q': ''  # quite mode
                },
                normalize=None,  # DO NOT normalize descriptors
            )
            classifier.train({P1_LABEL: d_p1, P2_LABEL: d_p2}, d_neg)

            # Test classifier
            x = numpy.random.rand(N, DIM)
            x_p1 = x[x[:, 1] <= 0.30]
            x_p2 = x[(x[:, 1] >= 0.36) & (x[:, 1] <= 0.63)]
            x_neg = x[x[:, 1] >= 0.69]

            d_p1 = p.map(make_element, enumerate(x_p1, di))
            di += len(d_p1)
            d_p2 = p.map(make_element, enumerate(x_p2, di))
            di += len(d_p2)
            d_neg = p.map(make_element, enumerate(x_neg, di))
            di += len(d_neg)

            for d in d_p1:
                c = classifier.classify(d, c_factory)
                ntools.assert_equal(c.max_label(),
                                    P1_LABEL,
                                    "Incorrect %s label: %s :: %s" %
                                    (P1_LABEL, d.vector(),
                                     c.get_classification()))
            for d in d_p2:
                c = classifier.classify(d, c_factory)
                ntools.assert_equal(c.max_label(),
                                    P2_LABEL,
                                    "Incorrect %s label: %s :: %s" %
                                    (P2_LABEL, d.vector(),
                                     c.get_classification()))
            for d in d_neg:
                c = classifier.classify(d, c_factory)
                ntools.assert_equal(c.max_label(),
                                    LibSvmClassifier.NEGATIVE_LABEL,
                                    "Incorrect %s label: %s :: %s" %
                                    (LibSvmClassifier.NEGATIVE_LABEL,
                                     d.vector(),
                                     c.get_classification()))

            # Closing resources
            p.close()
            p.join()

示例#10

0

显示文件

文件： test_libsvm.py 项目： msarahan/SMQTK

        def test_simple_classification(self):
            """
            Test libSVM classification functionality using random constructed
            data, training the y=0.5 split
            """
            DIM = 2
            N = 1000
            POS_LABEL = 'positive'
            p = multiprocessing.pool.ThreadPool()
            d_factory = DescriptorElementFactory(DescriptorMemoryElement, {})
            c_factory = ClassificationElementFactory(MemoryClassificationElement, {})

            def make_element((i, v)):
                d = d_factory.new_descriptor('test', i)
                d.set_vector(v)
                return d

            # Constructing artificial descriptors
            x = numpy.random.rand(N, DIM)
            x_pos = x[x[:, 1] <= 0.45]
            x_neg = x[x[:, 1] >= 0.55]

            d_pos = p.map(make_element, enumerate(x_pos))
            d_neg = p.map(make_element, enumerate(x_neg, start=N//2))

            # Create/Train test classifier
            classifier = LibSvmClassifier(
                train_params={
                    '-t': 0,  # linear kernel
                    '-b': 1,  # enable probability estimates
                    '-c': 2,  # SVM-C parameter C
                    '-q': '',  # quite mode
                },
                normalize=None,  # DO NOT normalize descriptors
            )
            classifier.train({POS_LABEL: d_pos}, d_neg)

            # Test classifier
            x = numpy.random.rand(N, DIM)
            x_pos = x[x[:, 1] <= 0.45]
            x_neg = x[x[:, 1] >= 0.55]

            d_pos = p.map(make_element, enumerate(x_pos, N))
            d_neg = p.map(make_element, enumerate(x_neg, N + N//2))

            for d in d_pos:
                c = classifier.classify(d, c_factory)
                ntools.assert_equal(c.max_label(),
                                    POS_LABEL,
                                    "Found False positive: %s :: %s" %
                                    (d.vector(), c.get_classification()))
            for d in d_neg:
                c = classifier.classify(d, c_factory)
                ntools.assert_equal(c.max_label(),
                                    LibSvmClassifier.NEGATIVE_LABEL,
                                    "Found False negative: %s :: %s" %
                                    (d.vector(), c.get_classification()))

            # Closing resources
            p.close()
            p.join()

示例#11

0

显示文件

def classifier_kfold_validation():
    args = cli_parser().parse_args()
    config = cli.utility_main_helper(default_config, args)
    log = logging.getLogger(__name__)

    #
    # Load configurations / Setup data
    #
    pr_enabled = config['pr_curves']['enabled']
    pr_output_dir = config['pr_curves']['output_directory']
    pr_file_prefix = config['pr_curves']['file_prefix'] or ''
    pr_show = config['pr_curves']['show']

    roc_enabled = config['roc_curves']['enabled']
    roc_output_dir = config['roc_curves']['output_directory']
    roc_file_prefix = config['roc_curves']['file_prefix'] or ''
    roc_show = config['roc_curves']['show']

    log.info("Initializing DescriptorSet (%s)",
             config['plugins']['descriptor_set']['type'])
    #: :type: smqtk.representation.DescriptorSet
    descriptor_set = from_config_dict(config['plugins']['descriptor_set'],
                                      DescriptorSet.get_impls())
    log.info("Loading classifier configuration")
    #: :type: dict
    classifier_config = config['plugins']['supervised_classifier']

    # Always use in-memory ClassificationElement since we are retraining the
    # classifier and don't want possible element caching
    #: :type: ClassificationElementFactory
    classification_factory = ClassificationElementFactory(
        MemoryClassificationElement, {})

    log.info("Loading truth data")
    #: :type: list[str]
    uuids = []
    #: :type: list[str]
    truth_labels = []
    with open(config['cross_validation']['truth_labels']) as f:
        f_csv = csv.reader(f)
        for row in f_csv:
            uuids.append(row[0])
            truth_labels.append(row[1])
    #: :type: numpy.ndarray[str]
    uuids = numpy.array(uuids)
    #: :type: numpy.ndarray[str]
    truth_labels = numpy.array(truth_labels)

    #
    # Cross validation
    #
    kfolds = sklearn.model_selection.StratifiedKFold(
        n_splits=config['cross_validation']['num_folds'],
        shuffle=True,
        random_state=config['cross_validation']['random_seed'],
    ).split(numpy.zeros(len(truth_labels)), truth_labels)
    """
    Truth and classification probability results for test data per fold.
    Format:
        {
            0: {
                '<label>':  {
                    "truth": [...],   # Parallel truth and classification
                    "proba": [...],   # probability values
                },
                ...
            },
            ...
        }
    """
    fold_data: Dict[int, Any] = {}

    i = 0
    for train, test in kfolds:
        log.info("Fold %d", i)
        log.info("-- %d training examples", len(train))
        log.info("-- %d test examples", len(test))
        fold_data[i] = {}

        log.info("-- creating classifier")
        classifier = cast(
            SupervisedClassifier,
            from_config_dict(classifier_config,
                             SupervisedClassifier.get_impls()))

        log.info("-- gathering descriptors")
        pos_map: Dict[str, List[DescriptorElement]] = {}
        for idx in train:
            if truth_labels[idx] not in pos_map:
                pos_map[truth_labels[idx]] = []
            pos_map[truth_labels[idx]].append(
                descriptor_set.get_descriptor(uuids[idx]))

        log.info("-- Training classifier")
        classifier.train(pos_map)

        log.info("-- Classifying test set")
        c_iter = classifier.classify_elements(
            (descriptor_set.get_descriptor(uuids[idx]) for idx in test),
            classification_factory,
        )
        uuid2c = dict((c.uuid, c.get_classification()) for c in c_iter)

        log.info("-- Pairing truth and computed probabilities")
        # Only considering positive labels
        for t_label in pos_map:
            fold_data[i][t_label] = {
                "truth": [L == t_label for L in truth_labels[test]],
                "proba": [uuid2c[uuid][t_label] for uuid in uuids[test]]
            }

        i += 1

    #
    # Curve generation
    #
    if pr_enabled:
        make_pr_curves(fold_data, pr_output_dir, pr_file_prefix, pr_show)
    if roc_enabled:
        make_roc_curves(fold_data, roc_output_dir, roc_file_prefix, roc_show)