def test_get_config(self):
     """
     We should be able to get the configuration of the current factory.
     This should look like the same as the
     """
     test_params = {'p1': 'some dir', 'vec': 1}
     factory = DescriptorElementFactory(DummyElementImpl, test_params)
     factory_config = factory.get_config()
     assert factory_config == {
         "type": "DummyElementImpl",
         "DummyElementImpl": test_params
     }
示例#2
0
def compute_descriptors(task, folderId, dataElementUris, **kwargs):
    """
    Celery task for computing descriptors for a series of data element URIs
    belonging to a single folder.

    After computing descriptors for a series of Girder files, the relevant items
    are updated within Girder to contain the smqtk_uuid (sha1) value as metadata.

    :param task: Celery provided task object.
    :param folderId: The folder these images are related to, this is used for
        namespacing the descriptor index table.
    :param dataElementUris: A list of data element URIs, these are assumed to be
        GirderDataElement URIs.
    """
    task.job_manager.updateProgress(message='Computing descriptors',
                                    forceFlush=True)
    generator = CaffeDescriptorGenerator(
        girderUriFromTask(
            task, getSetting(task.girder_client, 'caffe_network_prototxt')),
        girderUriFromTask(
            task, getSetting(task.girder_client, 'caffe_network_model')),
        girderUriFromTask(task,
                          getSetting(task.girder_client, 'caffe_image_mean')))

    factory = DescriptorElementFactory(
        PostgresDescriptorElement, {
            'db_name': getSetting(task.girder_client, 'db_name'),
            'db_host': getSetting(task.girder_client, 'db_host'),
            'db_user': getSetting(task.girder_client, 'db_user'),
            'db_pass': getSetting(task.girder_client, 'db_pass')
        })

    index = descriptorIndexFromFolderId(task.girder_client, folderId)

    valid_elements = iter_valid_elements([x[1] for x in dataElementUris],
                                         generator.valid_content_types())

    descriptors = compute_functions.compute_many_descriptors(valid_elements,
                                                             generator,
                                                             factory,
                                                             index,
                                                             use_mp=False)

    fileToItemId = dict([(y.split('/')[-1], x) for x, y in dataElementUris])

    for de, descriptor in descriptors:
        # TODO Catch errors that could occur here
        with task.girder_client.session():
            task.girder_client.addMetadataToItem(
                fileToItemId[de.file_id], {'smqtk_uuid': descriptor.uuid()})
示例#3
0
def train_classifier_iqr(config, iqr_state_fp):
    #: :type: smqtk.algorithms.SupervisedClassifier
    classifier = from_config_dict(config['classifier'],
                                  SupervisedClassifier.get_impls())

    # Load state into an empty IqrSession instance.
    with open(iqr_state_fp, 'rb') as f:
        state_bytes = f.read().strip()
    descr_factory = DescriptorElementFactory(DescriptorMemoryElement, {})
    iqrs = IqrSession()
    iqrs.set_state_bytes(state_bytes, descr_factory)

    # Positive descriptor examples for training are composed of those from
    # external and internal sets. Same for negative descriptor examples.
    pos = iqrs.positive_descriptors | iqrs.external_positive_descriptors
    neg = iqrs.negative_descriptors | iqrs.external_negative_descriptors
    classifier.train(class_examples={'positive': pos, 'negative': neg})
    def test_with_params(self):
        v = numpy.random.randint(0, 10, 10)
        test_params = {'p1': 'some dir', 'vec': v}

        factory = DescriptorElementFactory(DummyElementImpl, test_params)

        ex_type = 'type'
        ex_uuid = 'uuid'
        ex_args = ()
        ex_kwds = test_params
        # Should construct a new DEI instance under they hood somewhere
        r = factory.new_descriptor(ex_type, ex_uuid)

        ntools.assert_is_instance(r, DummyElementImpl)
        ntools.assert_equal(r._type_label, ex_type)
        ntools.assert_equal(r._uuid, ex_uuid)
        ntools.assert_equal(r.args, ex_args)
        ntools.assert_equal(r.kwds, ex_kwds)
    def test_no_params(self):
        test_params = {}

        factory = DescriptorElementFactory(DummyElementImpl, test_params)

        expected_type = 'type'
        expected_uuid = 'uuid'
        expected_args = ()
        expected_kwds = {}

        # Should construct a new DEI instance under they hood somewhere
        r = factory.new_descriptor(expected_type, expected_uuid)

        ntools.assert_is_instance(r, DummyElementImpl)
        ntools.assert_equal(r._type_label, expected_type)
        ntools.assert_equal(r._uuid, expected_uuid)
        ntools.assert_equal(r.args, expected_args)
        ntools.assert_equal(r.kwds, expected_kwds)
    def test_call(self):
        # Same as `test_with_params` but using __call__ entry point
        v = numpy.random.randint(0, 10, 10)
        test_params = {'p1': 'some dir', 'vec': v}

        factory = DescriptorElementFactory(DummyElementImpl, test_params)

        ex_type = 'type'
        ex_uuid = 'uuid'
        ex_args = ()
        ex_kwds = test_params
        # Should construct a new DEI instance under they hood somewhere
        r = factory(ex_type, ex_uuid)

        self.assertIsInstance(r, DummyElementImpl)
        self.assertEqual(r._type_label, ex_type)
        self.assertEqual(r._uuid, ex_uuid)
        self.assertEqual(r.args, ex_args)
        self.assertEqual(r.kwds, ex_kwds)
示例#7
0
        def test_simple_classification(self):
            """
            simple LibSvmClassifier test - 2-class

            Test libSVM classification functionality using random constructed
            data, training the y=0.5 split
            """
            DIM = 2
            N = 1000
            POS_LABEL = 'positive'
            NEG_LABEL = 'negative'
            p = multiprocessing.pool.ThreadPool()
            d_factory = DescriptorElementFactory(DescriptorMemoryElement, {})
            c_factory = ClassificationElementFactory(
                MemoryClassificationElement, {})

            def make_element(argtup):
                (i, v) = argtup
                d = d_factory.new_descriptor('test', i)
                d.set_vector(v)
                return d

            # Constructing artificial descriptors
            x = numpy.random.rand(N, DIM)
            x_pos = x[x[:, 1] <= 0.45]
            x_neg = x[x[:, 1] >= 0.55]

            d_pos = p.map(make_element, enumerate(x_pos))
            d_neg = p.map(make_element, enumerate(x_neg, start=N // 2))

            # Create/Train test classifier
            classifier = LibSvmClassifier(
                train_params={
                    '-t': 0,  # linear kernel
                    '-b': 1,  # enable probability estimates
                    '-c': 2,  # SVM-C parameter C
                    '-q': '',  # quite mode
                },
                normalize=None,  # DO NOT normalize descriptors
            )
            classifier.train({POS_LABEL: d_pos, NEG_LABEL: d_neg})

            # Test classifier
            x = numpy.random.rand(N, DIM)
            x_pos = x[x[:, 1] <= 0.45]
            x_neg = x[x[:, 1] >= 0.55]

            d_pos = p.map(make_element, enumerate(x_pos, N))
            d_neg = p.map(make_element, enumerate(x_neg, N + N // 2))

            d_pos_sync = {}  # for comparing to async
            for d in d_pos:
                c = classifier.classify(d, c_factory)
                ntools.assert_equal(
                    c.max_label(), POS_LABEL,
                    "Found False positive: %s :: %s" %
                    (d.vector(), c.get_classification()))
                d_pos_sync[d] = c

            d_neg_sync = {}
            for d in d_neg:
                c = classifier.classify(d, c_factory)
                ntools.assert_equal(
                    c.max_label(), NEG_LABEL,
                    "Found False negative: %s :: %s" %
                    (d.vector(), c.get_classification()))
                d_neg_sync[d] = c

            # test that async classify produces the same results
            # -- d_pos
            m_pos = classifier.classify_async(d_pos, c_factory)
            ntools.assert_equal(
                m_pos, d_pos_sync,
                "Async computation of pos set did not yield "
                "the same results as synchronous "
                "classification.")
            # -- d_neg
            m_neg = classifier.classify_async(d_neg, c_factory)
            ntools.assert_equal(
                m_neg, d_neg_sync,
                "Async computation of neg set did not yield "
                "the same results as synchronous "
                "classification.")
            # -- combined -- threaded
            combined_truth = dict(d_pos_sync.items())
            combined_truth.update(d_neg_sync)
            m_combined = classifier.classify_async(
                d_pos + d_neg,
                c_factory,
                use_multiprocessing=False,
            )
            ntools.assert_equal(
                m_combined, combined_truth,
                "Async computation of all test descriptors "
                "did not yield the same results as "
                "synchronous classification.")
            # -- combined -- multiprocess
            m_combined = classifier.classify_async(
                d_pos + d_neg,
                c_factory,
                use_multiprocessing=True,
            )
            ntools.assert_equal(
                m_combined, combined_truth,
                "Async computation of all test descriptors "
                "(mixed order) did not yield the same results "
                "as synchronous classification.")

            # Closing resources
            p.close()
            p.join()
示例#8
0
        def test_no_save_model_pickle(self):
            # Test model preservation across pickling even without model cache
            # file paths set.
            classifier = LibSvmClassifier(
                train_params={
                    '-t': 0,  # linear kernel
                    '-b': 1,  # enable probability estimates
                    '-c': 2,  # SVM-C parameter C
                    '-q': '',  # quite mode
                },
                normalize=None,  # DO NOT normalize descriptors
            )
            ntools.assert_true(classifier.svm_model is None)
            # Empty model should not trigger __LOCAL__ content in pickle
            ntools.assert_not_in('__LOCAL__', classifier.__getstate__())
            _ = cPickle.loads(cPickle.dumps(classifier))

            # train arbitrary model (same as ``test_simple_classification``)
            DIM = 2
            N = 1000
            POS_LABEL = 'positive'
            NEG_LABEL = 'negative'
            d_factory = DescriptorElementFactory(DescriptorMemoryElement, {})
            c_factory = ClassificationElementFactory(
                MemoryClassificationElement, {})

            def make_element(argtup):
                (i, v) = argtup
                d = d_factory.new_descriptor('test', i)
                d.set_vector(v)
                return d

            # Constructing artificial descriptors
            x = numpy.random.rand(N, DIM)
            x_pos = x[x[:, 1] <= 0.45]
            x_neg = x[x[:, 1] >= 0.55]
            p = multiprocessing.pool.ThreadPool()
            d_pos = p.map(make_element, enumerate(x_pos))
            d_neg = p.map(make_element, enumerate(x_neg, start=N // 2))
            p.close()
            p.join()

            # Training
            classifier.train({POS_LABEL: d_pos, NEG_LABEL: d_neg})

            # Test original classifier
            t_v = numpy.random.rand(DIM)
            t = d_factory.new_descriptor('query', 0)
            t.set_vector(t_v)
            c_expected = classifier.classify(t, c_factory)

            # Should see __LOCAL__ content in pickle state now
            p_state = classifier.__getstate__()
            ntools.assert_in('__LOCAL__', p_state)
            ntools.assert_in('__LOCAL_LABELS__', p_state)
            ntools.assert_in('__LOCAL_MODEL__', p_state)
            ntools.assert_true(len(p_state['__LOCAL_LABELS__']) > 0)
            ntools.assert_true(len(p_state['__LOCAL_MODEL__']) > 0)

            # Restored classifier should classify the same test descriptor the
            # same
            #: :type: LibSvmClassifier
            classifier2 = cPickle.loads(cPickle.dumps(classifier))
            c_post_pickle = classifier2.classify(t, c_factory)
            # There may be floating point error, so extract actual confidence
            # values and check post round
            c_pp_positive = c_post_pickle[POS_LABEL]
            c_pp_negative = c_post_pickle[NEG_LABEL]
            c_e_positive = c_expected[POS_LABEL]
            c_e_negative = c_expected[NEG_LABEL]
            ntools.assert_almost_equal(c_e_positive, c_pp_positive, 5)
            ntools.assert_almost_equal(c_e_negative, c_pp_negative, 5)
示例#9
0
        def test_simple_multiclass_classification(self):
            """
            simple LibSvmClassifier test - 3-class

            Test libSVM classification functionality using random constructed
            data, training the y=0.33 and y=.66 split
            """
            DIM = 2
            N = 1000
            P1_LABEL = 'p1'
            P2_LABEL = 'p2'
            P3_LABEL = 'p3'
            p = multiprocessing.pool.ThreadPool()
            d_factory = DescriptorElementFactory(DescriptorMemoryElement, {})
            c_factory = ClassificationElementFactory(
                MemoryClassificationElement, {})
            di = 0

            def make_element(argtup):
                (i, v) = argtup
                d = d_factory.new_descriptor('test', i)
                d.set_vector(v)
                return d

            # Constructing artificial descriptors
            x = numpy.random.rand(N, DIM)
            x_p1 = x[x[:, 1] <= 0.30]
            x_p2 = x[(x[:, 1] >= 0.36) & (x[:, 1] <= 0.63)]
            x_p3 = x[x[:, 1] >= 0.69]

            d_p1 = p.map(make_element, enumerate(x_p1, di))
            di += len(d_p1)
            d_p2 = p.map(make_element, enumerate(x_p2, di))
            di += len(d_p2)
            d_p3 = p.map(make_element, enumerate(x_p3, di))
            di += len(d_p3)

            # Create/Train test classifier
            classifier = LibSvmClassifier(
                train_params={
                    '-t': 0,  # linear kernel
                    '-b': 1,  # enable probability estimates
                    '-c': 2,  # SVM-C parameter C
                    '-q': ''  # quite mode
                },
                normalize=None,  # DO NOT normalize descriptors
            )
            classifier.train({P1_LABEL: d_p1, P2_LABEL: d_p2, P3_LABEL: d_p3})

            # Test classifier
            x = numpy.random.rand(N, DIM)
            x_p1 = x[x[:, 1] <= 0.30]
            x_p2 = x[(x[:, 1] >= 0.36) & (x[:, 1] <= 0.63)]
            x_p3 = x[x[:, 1] >= 0.69]

            d_p1 = p.map(make_element, enumerate(x_p1, di))
            di += len(d_p1)
            d_p2 = p.map(make_element, enumerate(x_p2, di))
            di += len(d_p2)
            d_p3 = p.map(make_element, enumerate(x_p3, di))
            di += len(d_p3)

            d_p1_sync = {}
            for d in d_p1:
                c = classifier.classify(d, c_factory)
                ntools.assert_equal(
                    c.max_label(), P1_LABEL, "Incorrect %s label: %s :: %s" %
                    (P1_LABEL, d.vector(), c.get_classification()))
                d_p1_sync[d] = c

            d_p2_sync = {}
            for d in d_p2:
                c = classifier.classify(d, c_factory)
                ntools.assert_equal(
                    c.max_label(), P2_LABEL, "Incorrect %s label: %s :: %s" %
                    (P2_LABEL, d.vector(), c.get_classification()))
                d_p2_sync[d] = c

            d_neg_sync = {}
            for d in d_p3:
                c = classifier.classify(d, c_factory)
                ntools.assert_equal(
                    c.max_label(), P3_LABEL, "Incorrect %s label: %s :: %s" %
                    (P3_LABEL, d.vector(), c.get_classification()))
                d_neg_sync[d] = c

            # test that async classify produces the same results
            # -- p1
            async_p1 = classifier.classify_async(d_p1, c_factory)
            ntools.assert_equal(
                async_p1, d_p1_sync,
                "Async computation of p1 set did not yield "
                "the same results as synchronous computation.")
            # -- p2
            async_p2 = classifier.classify_async(d_p2, c_factory)
            ntools.assert_equal(
                async_p2, d_p2_sync,
                "Async computation of p2 set did not yield "
                "the same results as synchronous computation.")
            # -- neg
            async_neg = classifier.classify_async(d_p3, c_factory)
            ntools.assert_equal(
                async_neg, d_neg_sync,
                "Async computation of neg set did not yield "
                "the same results as synchronous computation.")
            # -- combined -- threaded
            sync_combined = dict(d_p1_sync.items())
            sync_combined.update(d_p2_sync)
            sync_combined.update(d_neg_sync)
            async_combined = classifier.classify_async(
                d_p1 + d_p2 + d_p3, c_factory, use_multiprocessing=False)
            ntools.assert_equal(
                async_combined, sync_combined,
                "Async computation of all test descriptors "
                "did not yield the same results as "
                "synchronous classification.")
            # -- combined -- multiprocess
            async_combined = classifier.classify_async(
                d_p1 + d_p2 + d_p3, c_factory, use_multiprocessing=True)
            ntools.assert_equal(
                async_combined, sync_combined,
                "Async computation of all test descriptors "
                "(mixed order) did not yield the same results "
                "as synchronous classification.")

            # Closing resources
            p.close()
            p.join()
示例#10
0
import abc
import numpy

from smqtk.algorithms import SmqtkAlgorithm
from smqtk.representation import DescriptorElementFactory
from smqtk.representation.descriptor_element.local_elements import \
    DescriptorMemoryElement
from smqtk.utils import ContentTypeValidator
from smqtk.utils.parallel import parallel_map


DFLT_DESCRIPTOR_FACTORY = DescriptorElementFactory(DescriptorMemoryElement, {})


class DescriptorGenerator (SmqtkAlgorithm, ContentTypeValidator):
    """
    Base abstract Feature Descriptor interface
    """

    def compute_descriptor(self, data, descr_factory=DFLT_DESCRIPTOR_FACTORY,
                           overwrite=False):
        """
        Given some data, return a descriptor element containing a descriptor
        vector.

        :raises RuntimeError: Descriptor extraction failure of some kind.
        :raises ValueError: Given data element content was not of a valid type
            with respect to this descriptor.

        :param data: Some kind of input data for the feature descriptor.
        :type data: smqtk.representation.DataElement
file_element_config = {
    'save_dir': ROOT_DIR,
    'subdir_split': 10,
}

psql_element_config = {
    'db_name': 'smqtk',
    'db_host': 'localhost',
    'db_port': 6432,  # PgBouncer port
    'db_user': '******',
    'db_pass': '******',
}

file_element_factory = DescriptorElementFactory(
    DescriptorFileElement,
    file_element_config,
)

psql_element_factory = DescriptorElementFactory(
    PostgresDescriptorElement,
    psql_element_config,
)

fname_re = re.compile('(\w+)\.(\w+)\.vector\.npy')


def transfer_vector(type_str, uuid_str):
    pd = psql_element_factory(type_str, uuid_str)
    if not pd.has_vector():
        fd = file_element_factory(type_str, uuid_str)
        # removing the "-0" artifacts
示例#12
0
import logging
import multiprocessing
import multiprocessing.pool
import os.path as osp
import shutil
import uuid

from smqtk.algorithms.relevancy_index import get_relevancy_index_impls
from smqtk.representation import DescriptorElementFactory
from smqtk.representation.descriptor_element.local_elements import DescriptorMemoryElement
from smqtk.representation.descriptor_index.memory import DescriptorMemoryIndex
from smqtk.utils import SmqtkObject
from smqtk.utils import plugin
from smqtk.utils import file_utils

DFLT_MEMORY_DESCR_FACTORY = DescriptorElementFactory(DescriptorMemoryElement,
                                                     {})
DFLT_REL_INDEX_CONFIG = {
    "type": "LibSvmHikRelevancyIndex",
    "LibSvmHikRelevancyIndex": {
        "descr_cache_filepath": None,
    }
}


class IqrResultsDict(dict):
    """
    Dictionary subclass for containing DescriptorElement-to-float mapping.

    We expect keys to be DescriptorElement instances and the values to be floats
    between [0,1], inclusive.
示例#13
0
        def test_simple_multiclass_classification(self):
            """
            Test libSVM classification functionality using random constructed
            data, training the y=0.33 and y=.66 split
            """
            DIM = 2
            N = 1000
            P1_LABEL = 'p1'
            P2_LABEL = 'p2'
            p = multiprocessing.pool.ThreadPool()
            d_factory = DescriptorElementFactory(DescriptorMemoryElement, {})
            c_factory = ClassificationElementFactory(MemoryClassificationElement, {})
            di = 0

            def make_element((i, v)):
                d = d_factory.new_descriptor('test', i)
                d.set_vector(v)
                return d

            # Constructing artificial descriptors
            x = numpy.random.rand(N, DIM)
            x_p1 = x[x[:, 1] <= 0.30]
            x_p2 = x[(x[:, 1] >= 0.36) & (x[:, 1] <= 0.63)]
            x_neg = x[x[:, 1] >= 0.69]

            d_p1 = p.map(make_element, enumerate(x_p1, di))
            di += len(d_p1)
            d_p2 = p.map(make_element, enumerate(x_p2, di))
            di += len(d_p2)
            d_neg = p.map(make_element, enumerate(x_neg, di))
            di += len(d_neg)

            # Create/Train test classifier
            classifier = LibSvmClassifier(
                train_params={
                    '-t': 0,  # linear kernel
                    '-b': 1,  # enable probability estimates
                    '-c': 2,  # SVM-C parameter C
                    '-q': ''  # quite mode
                },
                normalize=None,  # DO NOT normalize descriptors
            )
            classifier.train({P1_LABEL: d_p1, P2_LABEL: d_p2}, d_neg)

            # Test classifier
            x = numpy.random.rand(N, DIM)
            x_p1 = x[x[:, 1] <= 0.30]
            x_p2 = x[(x[:, 1] >= 0.36) & (x[:, 1] <= 0.63)]
            x_neg = x[x[:, 1] >= 0.69]

            d_p1 = p.map(make_element, enumerate(x_p1, di))
            di += len(d_p1)
            d_p2 = p.map(make_element, enumerate(x_p2, di))
            di += len(d_p2)
            d_neg = p.map(make_element, enumerate(x_neg, di))
            di += len(d_neg)

            for d in d_p1:
                c = classifier.classify(d, c_factory)
                ntools.assert_equal(c.max_label(),
                                    P1_LABEL,
                                    "Incorrect %s label: %s :: %s" %
                                    (P1_LABEL, d.vector(),
                                     c.get_classification()))
            for d in d_p2:
                c = classifier.classify(d, c_factory)
                ntools.assert_equal(c.max_label(),
                                    P2_LABEL,
                                    "Incorrect %s label: %s :: %s" %
                                    (P2_LABEL, d.vector(),
                                     c.get_classification()))
            for d in d_neg:
                c = classifier.classify(d, c_factory)
                ntools.assert_equal(c.max_label(),
                                    LibSvmClassifier.NEGATIVE_LABEL,
                                    "Incorrect %s label: %s :: %s" %
                                    (LibSvmClassifier.NEGATIVE_LABEL,
                                     d.vector(),
                                     c.get_classification()))

            # Closing resources
            p.close()
            p.join()
示例#14
0
        def test_simple_classification(self):
            """
            Test libSVM classification functionality using random constructed
            data, training the y=0.5 split
            """
            DIM = 2
            N = 1000
            POS_LABEL = 'positive'
            p = multiprocessing.pool.ThreadPool()
            d_factory = DescriptorElementFactory(DescriptorMemoryElement, {})
            c_factory = ClassificationElementFactory(MemoryClassificationElement, {})

            def make_element((i, v)):
                d = d_factory.new_descriptor('test', i)
                d.set_vector(v)
                return d

            # Constructing artificial descriptors
            x = numpy.random.rand(N, DIM)
            x_pos = x[x[:, 1] <= 0.45]
            x_neg = x[x[:, 1] >= 0.55]

            d_pos = p.map(make_element, enumerate(x_pos))
            d_neg = p.map(make_element, enumerate(x_neg, start=N//2))

            # Create/Train test classifier
            classifier = LibSvmClassifier(
                train_params={
                    '-t': 0,  # linear kernel
                    '-b': 1,  # enable probability estimates
                    '-c': 2,  # SVM-C parameter C
                    '-q': '',  # quite mode
                },
                normalize=None,  # DO NOT normalize descriptors
            )
            classifier.train({POS_LABEL: d_pos}, d_neg)

            # Test classifier
            x = numpy.random.rand(N, DIM)
            x_pos = x[x[:, 1] <= 0.45]
            x_neg = x[x[:, 1] >= 0.55]

            d_pos = p.map(make_element, enumerate(x_pos, N))
            d_neg = p.map(make_element, enumerate(x_neg, N + N//2))

            for d in d_pos:
                c = classifier.classify(d, c_factory)
                ntools.assert_equal(c.max_label(),
                                    POS_LABEL,
                                    "Found False positive: %s :: %s" %
                                    (d.vector(), c.get_classification()))
            for d in d_neg:
                c = classifier.classify(d, c_factory)
                ntools.assert_equal(c.max_label(),
                                    LibSvmClassifier.NEGATIVE_LABEL,
                                    "Found False negative: %s :: %s" %
                                    (d.vector(), c.get_classification()))

            # Closing resources
            p.close()
            p.join()
示例#15
0
    def test_simple_multiclass_classification(self):
        """
        simple LibSvmClassifier test - 3-class

        Test libSVM classification functionality using random constructed
        data, training the y=0.33 and y=.66 split
        """
        DIM = 2
        N = 1000
        P1_LABEL = 'p1'
        P2_LABEL = 'p2'
        P3_LABEL = 'p3'
        p = multiprocessing.pool.ThreadPool()
        d_factory = DescriptorElementFactory(DescriptorMemoryElement, {})
        di = 0

        def make_element(iv):
            i, v = iv
            elem = d_factory.new_descriptor('test', i)
            elem.set_vector(v)
            return elem

        # Constructing artificial descriptors
        x = numpy.random.rand(N, DIM)
        x_p1 = x[x[:, 1] <= 0.30]
        x_p2 = x[(x[:, 1] >= 0.36) & (x[:, 1] <= 0.63)]
        x_p3 = x[x[:, 1] >= 0.69]

        d_p1 = p.map(make_element, enumerate(x_p1, di))
        di += len(d_p1)
        d_p2 = p.map(make_element, enumerate(x_p2, di))
        di += len(d_p2)
        d_p3 = p.map(make_element, enumerate(x_p3, di))
        di += len(d_p3)

        # Create/Train test classifier
        classifier = LibSvmClassifier(
            train_params={
                '-t': 0,  # linear kernel
                '-b': 1,  # enable probability estimates
                '-c': 2,  # SVM-C parameter C
                '-q': ''  # quite mode
            },
            normalize=None,  # DO NOT normalize descriptors
        )
        classifier.train({P1_LABEL: d_p1, P2_LABEL: d_p2, P3_LABEL: d_p3})

        # Test classifier
        x = numpy.random.rand(N, DIM)
        x_p1 = x[x[:, 1] <= 0.30]
        x_p2 = x[(x[:, 1] >= 0.36) & (x[:, 1] <= 0.63)]
        x_p3 = x[x[:, 1] >= 0.69]

        # Test that examples expected to classify to certain classes are.
        c_map_p1 = list(classifier._classify_arrays(x_p1))
        for v, c_map in zip(x_p1, c_map_p1):
            assert c_map[P1_LABEL] > max(c_map[P2_LABEL], c_map[P3_LABEL]), \
                "Incorrect {} label: {} :: {}".format(P1_LABEL, v, c_map)

        c_map_p2 = list(classifier._classify_arrays(x_p2))
        for v, c_map in zip(x_p2, c_map_p2):
            assert c_map[P2_LABEL] > max(c_map[P1_LABEL], c_map[P3_LABEL]), \
                "Incorrect {} label: {} :: {}".format(P2_LABEL, v, c_map)

        c_map_p3 = list(classifier._classify_arrays(x_p3))
        for v, c_map in zip(x_p3, c_map_p3):
            assert c_map[P3_LABEL] > max(c_map[P1_LABEL], c_map[P2_LABEL]), \
                "Incorrect {} label: {} :: {}".format(P3_LABEL, v, c_map)

        # Closing resources
        p.close()
        p.join()
示例#16
0
    def test_simple_classification(self):
        """
        simple LibSvmClassifier test - 2-class

        Test libSVM classification functionality using random constructed
        data, training the y=0.5 split
        """
        DIM = 2
        N = 1000
        POS_LABEL = 'positive'
        NEG_LABEL = 'negative'
        p = multiprocessing.pool.ThreadPool()
        d_factory = DescriptorElementFactory(DescriptorMemoryElement, {})

        def make_element(iv):
            i, v = iv
            elem = d_factory.new_descriptor('test', i)
            elem.set_vector(v)
            return elem

        # Constructing artificial descriptors
        x = numpy.random.rand(N, DIM)
        x_pos = x[x[:, 1] <= 0.45]
        x_neg = x[x[:, 1] >= 0.55]

        d_pos = p.map(make_element, enumerate(x_pos))
        d_neg = p.map(make_element, enumerate(x_neg, start=N//2))

        # Create/Train test classifier
        classifier = LibSvmClassifier(
            train_params={
                '-t': 0,  # linear kernel
                '-b': 1,  # enable probability estimates
                '-c': 2,  # SVM-C parameter C
                '-q': '',  # quite mode
            },
            normalize=None,  # DO NOT normalize descriptors
        )
        classifier.train({POS_LABEL: d_pos, NEG_LABEL: d_neg})

        # Test classifier
        x = numpy.random.rand(N, DIM)
        x_pos = x[x[:, 1] <= 0.45]
        x_neg = x[x[:, 1] >= 0.55]

        # Test that examples expected to classify to the positive class are,
        # and same for those expected to be in the negative class.
        c_map_pos = list(classifier._classify_arrays(x_pos))
        for v, c_map in zip(x_pos, c_map_pos):
            assert c_map[POS_LABEL] > c_map[NEG_LABEL], \
                "Found False positive: {} :: {}" \
                .format(v, c_map)

        c_map_neg = list(classifier._classify_arrays(x_neg))
        for v, c_map in zip(x_neg, c_map_neg):
            assert c_map[NEG_LABEL] > c_map[POS_LABEL], \
                "Found False negative: {} :: {}" \
                .format(v, c_map)

        # Closing resources
        p.close()
        p.join()