def test_get_config(self): """ We should be able to get the configuration of the current factory. This should look like the same as the """ test_params = {'p1': 'some dir', 'vec': 1} factory = DescriptorElementFactory(DummyElementImpl, test_params) factory_config = factory.get_config() assert factory_config == { "type": "DummyElementImpl", "DummyElementImpl": test_params }
def compute_descriptors(task, folderId, dataElementUris, **kwargs): """ Celery task for computing descriptors for a series of data element URIs belonging to a single folder. After computing descriptors for a series of Girder files, the relevant items are updated within Girder to contain the smqtk_uuid (sha1) value as metadata. :param task: Celery provided task object. :param folderId: The folder these images are related to, this is used for namespacing the descriptor index table. :param dataElementUris: A list of data element URIs, these are assumed to be GirderDataElement URIs. """ task.job_manager.updateProgress(message='Computing descriptors', forceFlush=True) generator = CaffeDescriptorGenerator( girderUriFromTask( task, getSetting(task.girder_client, 'caffe_network_prototxt')), girderUriFromTask( task, getSetting(task.girder_client, 'caffe_network_model')), girderUriFromTask(task, getSetting(task.girder_client, 'caffe_image_mean'))) factory = DescriptorElementFactory( PostgresDescriptorElement, { 'db_name': getSetting(task.girder_client, 'db_name'), 'db_host': getSetting(task.girder_client, 'db_host'), 'db_user': getSetting(task.girder_client, 'db_user'), 'db_pass': getSetting(task.girder_client, 'db_pass') }) index = descriptorIndexFromFolderId(task.girder_client, folderId) valid_elements = iter_valid_elements([x[1] for x in dataElementUris], generator.valid_content_types()) descriptors = compute_functions.compute_many_descriptors(valid_elements, generator, factory, index, use_mp=False) fileToItemId = dict([(y.split('/')[-1], x) for x, y in dataElementUris]) for de, descriptor in descriptors: # TODO Catch errors that could occur here with task.girder_client.session(): task.girder_client.addMetadataToItem( fileToItemId[de.file_id], {'smqtk_uuid': descriptor.uuid()})
def train_classifier_iqr(config, iqr_state_fp): #: :type: smqtk.algorithms.SupervisedClassifier classifier = from_config_dict(config['classifier'], SupervisedClassifier.get_impls()) # Load state into an empty IqrSession instance. with open(iqr_state_fp, 'rb') as f: state_bytes = f.read().strip() descr_factory = DescriptorElementFactory(DescriptorMemoryElement, {}) iqrs = IqrSession() iqrs.set_state_bytes(state_bytes, descr_factory) # Positive descriptor examples for training are composed of those from # external and internal sets. Same for negative descriptor examples. pos = iqrs.positive_descriptors | iqrs.external_positive_descriptors neg = iqrs.negative_descriptors | iqrs.external_negative_descriptors classifier.train(class_examples={'positive': pos, 'negative': neg})
def test_with_params(self): v = numpy.random.randint(0, 10, 10) test_params = {'p1': 'some dir', 'vec': v} factory = DescriptorElementFactory(DummyElementImpl, test_params) ex_type = 'type' ex_uuid = 'uuid' ex_args = () ex_kwds = test_params # Should construct a new DEI instance under they hood somewhere r = factory.new_descriptor(ex_type, ex_uuid) ntools.assert_is_instance(r, DummyElementImpl) ntools.assert_equal(r._type_label, ex_type) ntools.assert_equal(r._uuid, ex_uuid) ntools.assert_equal(r.args, ex_args) ntools.assert_equal(r.kwds, ex_kwds)
def test_no_params(self): test_params = {} factory = DescriptorElementFactory(DummyElementImpl, test_params) expected_type = 'type' expected_uuid = 'uuid' expected_args = () expected_kwds = {} # Should construct a new DEI instance under they hood somewhere r = factory.new_descriptor(expected_type, expected_uuid) ntools.assert_is_instance(r, DummyElementImpl) ntools.assert_equal(r._type_label, expected_type) ntools.assert_equal(r._uuid, expected_uuid) ntools.assert_equal(r.args, expected_args) ntools.assert_equal(r.kwds, expected_kwds)
def test_call(self): # Same as `test_with_params` but using __call__ entry point v = numpy.random.randint(0, 10, 10) test_params = {'p1': 'some dir', 'vec': v} factory = DescriptorElementFactory(DummyElementImpl, test_params) ex_type = 'type' ex_uuid = 'uuid' ex_args = () ex_kwds = test_params # Should construct a new DEI instance under they hood somewhere r = factory(ex_type, ex_uuid) self.assertIsInstance(r, DummyElementImpl) self.assertEqual(r._type_label, ex_type) self.assertEqual(r._uuid, ex_uuid) self.assertEqual(r.args, ex_args) self.assertEqual(r.kwds, ex_kwds)
def test_simple_classification(self): """ simple LibSvmClassifier test - 2-class Test libSVM classification functionality using random constructed data, training the y=0.5 split """ DIM = 2 N = 1000 POS_LABEL = 'positive' NEG_LABEL = 'negative' p = multiprocessing.pool.ThreadPool() d_factory = DescriptorElementFactory(DescriptorMemoryElement, {}) c_factory = ClassificationElementFactory( MemoryClassificationElement, {}) def make_element(argtup): (i, v) = argtup d = d_factory.new_descriptor('test', i) d.set_vector(v) return d # Constructing artificial descriptors x = numpy.random.rand(N, DIM) x_pos = x[x[:, 1] <= 0.45] x_neg = x[x[:, 1] >= 0.55] d_pos = p.map(make_element, enumerate(x_pos)) d_neg = p.map(make_element, enumerate(x_neg, start=N // 2)) # Create/Train test classifier classifier = LibSvmClassifier( train_params={ '-t': 0, # linear kernel '-b': 1, # enable probability estimates '-c': 2, # SVM-C parameter C '-q': '', # quite mode }, normalize=None, # DO NOT normalize descriptors ) classifier.train({POS_LABEL: d_pos, NEG_LABEL: d_neg}) # Test classifier x = numpy.random.rand(N, DIM) x_pos = x[x[:, 1] <= 0.45] x_neg = x[x[:, 1] >= 0.55] d_pos = p.map(make_element, enumerate(x_pos, N)) d_neg = p.map(make_element, enumerate(x_neg, N + N // 2)) d_pos_sync = {} # for comparing to async for d in d_pos: c = classifier.classify(d, c_factory) ntools.assert_equal( c.max_label(), POS_LABEL, "Found False positive: %s :: %s" % (d.vector(), c.get_classification())) d_pos_sync[d] = c d_neg_sync = {} for d in d_neg: c = classifier.classify(d, c_factory) ntools.assert_equal( c.max_label(), NEG_LABEL, "Found False negative: %s :: %s" % (d.vector(), c.get_classification())) d_neg_sync[d] = c # test that async classify produces the same results # -- d_pos m_pos = classifier.classify_async(d_pos, c_factory) ntools.assert_equal( m_pos, d_pos_sync, "Async computation of pos set did not yield " "the same results as synchronous " "classification.") # -- d_neg m_neg = classifier.classify_async(d_neg, c_factory) ntools.assert_equal( m_neg, d_neg_sync, "Async computation of neg set did not yield " "the same results as synchronous " "classification.") # -- combined -- threaded combined_truth = dict(d_pos_sync.items()) combined_truth.update(d_neg_sync) m_combined = classifier.classify_async( d_pos + d_neg, c_factory, use_multiprocessing=False, ) ntools.assert_equal( m_combined, combined_truth, "Async computation of all test descriptors " "did not yield the same results as " "synchronous classification.") # -- combined -- multiprocess m_combined = classifier.classify_async( d_pos + d_neg, c_factory, use_multiprocessing=True, ) ntools.assert_equal( m_combined, combined_truth, "Async computation of all test descriptors " "(mixed order) did not yield the same results " "as synchronous classification.") # Closing resources p.close() p.join()
def test_no_save_model_pickle(self): # Test model preservation across pickling even without model cache # file paths set. classifier = LibSvmClassifier( train_params={ '-t': 0, # linear kernel '-b': 1, # enable probability estimates '-c': 2, # SVM-C parameter C '-q': '', # quite mode }, normalize=None, # DO NOT normalize descriptors ) ntools.assert_true(classifier.svm_model is None) # Empty model should not trigger __LOCAL__ content in pickle ntools.assert_not_in('__LOCAL__', classifier.__getstate__()) _ = cPickle.loads(cPickle.dumps(classifier)) # train arbitrary model (same as ``test_simple_classification``) DIM = 2 N = 1000 POS_LABEL = 'positive' NEG_LABEL = 'negative' d_factory = DescriptorElementFactory(DescriptorMemoryElement, {}) c_factory = ClassificationElementFactory( MemoryClassificationElement, {}) def make_element(argtup): (i, v) = argtup d = d_factory.new_descriptor('test', i) d.set_vector(v) return d # Constructing artificial descriptors x = numpy.random.rand(N, DIM) x_pos = x[x[:, 1] <= 0.45] x_neg = x[x[:, 1] >= 0.55] p = multiprocessing.pool.ThreadPool() d_pos = p.map(make_element, enumerate(x_pos)) d_neg = p.map(make_element, enumerate(x_neg, start=N // 2)) p.close() p.join() # Training classifier.train({POS_LABEL: d_pos, NEG_LABEL: d_neg}) # Test original classifier t_v = numpy.random.rand(DIM) t = d_factory.new_descriptor('query', 0) t.set_vector(t_v) c_expected = classifier.classify(t, c_factory) # Should see __LOCAL__ content in pickle state now p_state = classifier.__getstate__() ntools.assert_in('__LOCAL__', p_state) ntools.assert_in('__LOCAL_LABELS__', p_state) ntools.assert_in('__LOCAL_MODEL__', p_state) ntools.assert_true(len(p_state['__LOCAL_LABELS__']) > 0) ntools.assert_true(len(p_state['__LOCAL_MODEL__']) > 0) # Restored classifier should classify the same test descriptor the # same #: :type: LibSvmClassifier classifier2 = cPickle.loads(cPickle.dumps(classifier)) c_post_pickle = classifier2.classify(t, c_factory) # There may be floating point error, so extract actual confidence # values and check post round c_pp_positive = c_post_pickle[POS_LABEL] c_pp_negative = c_post_pickle[NEG_LABEL] c_e_positive = c_expected[POS_LABEL] c_e_negative = c_expected[NEG_LABEL] ntools.assert_almost_equal(c_e_positive, c_pp_positive, 5) ntools.assert_almost_equal(c_e_negative, c_pp_negative, 5)
def test_simple_multiclass_classification(self): """ simple LibSvmClassifier test - 3-class Test libSVM classification functionality using random constructed data, training the y=0.33 and y=.66 split """ DIM = 2 N = 1000 P1_LABEL = 'p1' P2_LABEL = 'p2' P3_LABEL = 'p3' p = multiprocessing.pool.ThreadPool() d_factory = DescriptorElementFactory(DescriptorMemoryElement, {}) c_factory = ClassificationElementFactory( MemoryClassificationElement, {}) di = 0 def make_element(argtup): (i, v) = argtup d = d_factory.new_descriptor('test', i) d.set_vector(v) return d # Constructing artificial descriptors x = numpy.random.rand(N, DIM) x_p1 = x[x[:, 1] <= 0.30] x_p2 = x[(x[:, 1] >= 0.36) & (x[:, 1] <= 0.63)] x_p3 = x[x[:, 1] >= 0.69] d_p1 = p.map(make_element, enumerate(x_p1, di)) di += len(d_p1) d_p2 = p.map(make_element, enumerate(x_p2, di)) di += len(d_p2) d_p3 = p.map(make_element, enumerate(x_p3, di)) di += len(d_p3) # Create/Train test classifier classifier = LibSvmClassifier( train_params={ '-t': 0, # linear kernel '-b': 1, # enable probability estimates '-c': 2, # SVM-C parameter C '-q': '' # quite mode }, normalize=None, # DO NOT normalize descriptors ) classifier.train({P1_LABEL: d_p1, P2_LABEL: d_p2, P3_LABEL: d_p3}) # Test classifier x = numpy.random.rand(N, DIM) x_p1 = x[x[:, 1] <= 0.30] x_p2 = x[(x[:, 1] >= 0.36) & (x[:, 1] <= 0.63)] x_p3 = x[x[:, 1] >= 0.69] d_p1 = p.map(make_element, enumerate(x_p1, di)) di += len(d_p1) d_p2 = p.map(make_element, enumerate(x_p2, di)) di += len(d_p2) d_p3 = p.map(make_element, enumerate(x_p3, di)) di += len(d_p3) d_p1_sync = {} for d in d_p1: c = classifier.classify(d, c_factory) ntools.assert_equal( c.max_label(), P1_LABEL, "Incorrect %s label: %s :: %s" % (P1_LABEL, d.vector(), c.get_classification())) d_p1_sync[d] = c d_p2_sync = {} for d in d_p2: c = classifier.classify(d, c_factory) ntools.assert_equal( c.max_label(), P2_LABEL, "Incorrect %s label: %s :: %s" % (P2_LABEL, d.vector(), c.get_classification())) d_p2_sync[d] = c d_neg_sync = {} for d in d_p3: c = classifier.classify(d, c_factory) ntools.assert_equal( c.max_label(), P3_LABEL, "Incorrect %s label: %s :: %s" % (P3_LABEL, d.vector(), c.get_classification())) d_neg_sync[d] = c # test that async classify produces the same results # -- p1 async_p1 = classifier.classify_async(d_p1, c_factory) ntools.assert_equal( async_p1, d_p1_sync, "Async computation of p1 set did not yield " "the same results as synchronous computation.") # -- p2 async_p2 = classifier.classify_async(d_p2, c_factory) ntools.assert_equal( async_p2, d_p2_sync, "Async computation of p2 set did not yield " "the same results as synchronous computation.") # -- neg async_neg = classifier.classify_async(d_p3, c_factory) ntools.assert_equal( async_neg, d_neg_sync, "Async computation of neg set did not yield " "the same results as synchronous computation.") # -- combined -- threaded sync_combined = dict(d_p1_sync.items()) sync_combined.update(d_p2_sync) sync_combined.update(d_neg_sync) async_combined = classifier.classify_async( d_p1 + d_p2 + d_p3, c_factory, use_multiprocessing=False) ntools.assert_equal( async_combined, sync_combined, "Async computation of all test descriptors " "did not yield the same results as " "synchronous classification.") # -- combined -- multiprocess async_combined = classifier.classify_async( d_p1 + d_p2 + d_p3, c_factory, use_multiprocessing=True) ntools.assert_equal( async_combined, sync_combined, "Async computation of all test descriptors " "(mixed order) did not yield the same results " "as synchronous classification.") # Closing resources p.close() p.join()
import abc import numpy from smqtk.algorithms import SmqtkAlgorithm from smqtk.representation import DescriptorElementFactory from smqtk.representation.descriptor_element.local_elements import \ DescriptorMemoryElement from smqtk.utils import ContentTypeValidator from smqtk.utils.parallel import parallel_map DFLT_DESCRIPTOR_FACTORY = DescriptorElementFactory(DescriptorMemoryElement, {}) class DescriptorGenerator (SmqtkAlgorithm, ContentTypeValidator): """ Base abstract Feature Descriptor interface """ def compute_descriptor(self, data, descr_factory=DFLT_DESCRIPTOR_FACTORY, overwrite=False): """ Given some data, return a descriptor element containing a descriptor vector. :raises RuntimeError: Descriptor extraction failure of some kind. :raises ValueError: Given data element content was not of a valid type with respect to this descriptor. :param data: Some kind of input data for the feature descriptor. :type data: smqtk.representation.DataElement
file_element_config = { 'save_dir': ROOT_DIR, 'subdir_split': 10, } psql_element_config = { 'db_name': 'smqtk', 'db_host': 'localhost', 'db_port': 6432, # PgBouncer port 'db_user': '******', 'db_pass': '******', } file_element_factory = DescriptorElementFactory( DescriptorFileElement, file_element_config, ) psql_element_factory = DescriptorElementFactory( PostgresDescriptorElement, psql_element_config, ) fname_re = re.compile('(\w+)\.(\w+)\.vector\.npy') def transfer_vector(type_str, uuid_str): pd = psql_element_factory(type_str, uuid_str) if not pd.has_vector(): fd = file_element_factory(type_str, uuid_str) # removing the "-0" artifacts
import logging import multiprocessing import multiprocessing.pool import os.path as osp import shutil import uuid from smqtk.algorithms.relevancy_index import get_relevancy_index_impls from smqtk.representation import DescriptorElementFactory from smqtk.representation.descriptor_element.local_elements import DescriptorMemoryElement from smqtk.representation.descriptor_index.memory import DescriptorMemoryIndex from smqtk.utils import SmqtkObject from smqtk.utils import plugin from smqtk.utils import file_utils DFLT_MEMORY_DESCR_FACTORY = DescriptorElementFactory(DescriptorMemoryElement, {}) DFLT_REL_INDEX_CONFIG = { "type": "LibSvmHikRelevancyIndex", "LibSvmHikRelevancyIndex": { "descr_cache_filepath": None, } } class IqrResultsDict(dict): """ Dictionary subclass for containing DescriptorElement-to-float mapping. We expect keys to be DescriptorElement instances and the values to be floats between [0,1], inclusive.
def test_simple_multiclass_classification(self): """ Test libSVM classification functionality using random constructed data, training the y=0.33 and y=.66 split """ DIM = 2 N = 1000 P1_LABEL = 'p1' P2_LABEL = 'p2' p = multiprocessing.pool.ThreadPool() d_factory = DescriptorElementFactory(DescriptorMemoryElement, {}) c_factory = ClassificationElementFactory(MemoryClassificationElement, {}) di = 0 def make_element((i, v)): d = d_factory.new_descriptor('test', i) d.set_vector(v) return d # Constructing artificial descriptors x = numpy.random.rand(N, DIM) x_p1 = x[x[:, 1] <= 0.30] x_p2 = x[(x[:, 1] >= 0.36) & (x[:, 1] <= 0.63)] x_neg = x[x[:, 1] >= 0.69] d_p1 = p.map(make_element, enumerate(x_p1, di)) di += len(d_p1) d_p2 = p.map(make_element, enumerate(x_p2, di)) di += len(d_p2) d_neg = p.map(make_element, enumerate(x_neg, di)) di += len(d_neg) # Create/Train test classifier classifier = LibSvmClassifier( train_params={ '-t': 0, # linear kernel '-b': 1, # enable probability estimates '-c': 2, # SVM-C parameter C '-q': '' # quite mode }, normalize=None, # DO NOT normalize descriptors ) classifier.train({P1_LABEL: d_p1, P2_LABEL: d_p2}, d_neg) # Test classifier x = numpy.random.rand(N, DIM) x_p1 = x[x[:, 1] <= 0.30] x_p2 = x[(x[:, 1] >= 0.36) & (x[:, 1] <= 0.63)] x_neg = x[x[:, 1] >= 0.69] d_p1 = p.map(make_element, enumerate(x_p1, di)) di += len(d_p1) d_p2 = p.map(make_element, enumerate(x_p2, di)) di += len(d_p2) d_neg = p.map(make_element, enumerate(x_neg, di)) di += len(d_neg) for d in d_p1: c = classifier.classify(d, c_factory) ntools.assert_equal(c.max_label(), P1_LABEL, "Incorrect %s label: %s :: %s" % (P1_LABEL, d.vector(), c.get_classification())) for d in d_p2: c = classifier.classify(d, c_factory) ntools.assert_equal(c.max_label(), P2_LABEL, "Incorrect %s label: %s :: %s" % (P2_LABEL, d.vector(), c.get_classification())) for d in d_neg: c = classifier.classify(d, c_factory) ntools.assert_equal(c.max_label(), LibSvmClassifier.NEGATIVE_LABEL, "Incorrect %s label: %s :: %s" % (LibSvmClassifier.NEGATIVE_LABEL, d.vector(), c.get_classification())) # Closing resources p.close() p.join()
def test_simple_classification(self): """ Test libSVM classification functionality using random constructed data, training the y=0.5 split """ DIM = 2 N = 1000 POS_LABEL = 'positive' p = multiprocessing.pool.ThreadPool() d_factory = DescriptorElementFactory(DescriptorMemoryElement, {}) c_factory = ClassificationElementFactory(MemoryClassificationElement, {}) def make_element((i, v)): d = d_factory.new_descriptor('test', i) d.set_vector(v) return d # Constructing artificial descriptors x = numpy.random.rand(N, DIM) x_pos = x[x[:, 1] <= 0.45] x_neg = x[x[:, 1] >= 0.55] d_pos = p.map(make_element, enumerate(x_pos)) d_neg = p.map(make_element, enumerate(x_neg, start=N//2)) # Create/Train test classifier classifier = LibSvmClassifier( train_params={ '-t': 0, # linear kernel '-b': 1, # enable probability estimates '-c': 2, # SVM-C parameter C '-q': '', # quite mode }, normalize=None, # DO NOT normalize descriptors ) classifier.train({POS_LABEL: d_pos}, d_neg) # Test classifier x = numpy.random.rand(N, DIM) x_pos = x[x[:, 1] <= 0.45] x_neg = x[x[:, 1] >= 0.55] d_pos = p.map(make_element, enumerate(x_pos, N)) d_neg = p.map(make_element, enumerate(x_neg, N + N//2)) for d in d_pos: c = classifier.classify(d, c_factory) ntools.assert_equal(c.max_label(), POS_LABEL, "Found False positive: %s :: %s" % (d.vector(), c.get_classification())) for d in d_neg: c = classifier.classify(d, c_factory) ntools.assert_equal(c.max_label(), LibSvmClassifier.NEGATIVE_LABEL, "Found False negative: %s :: %s" % (d.vector(), c.get_classification())) # Closing resources p.close() p.join()
def test_simple_multiclass_classification(self): """ simple LibSvmClassifier test - 3-class Test libSVM classification functionality using random constructed data, training the y=0.33 and y=.66 split """ DIM = 2 N = 1000 P1_LABEL = 'p1' P2_LABEL = 'p2' P3_LABEL = 'p3' p = multiprocessing.pool.ThreadPool() d_factory = DescriptorElementFactory(DescriptorMemoryElement, {}) di = 0 def make_element(iv): i, v = iv elem = d_factory.new_descriptor('test', i) elem.set_vector(v) return elem # Constructing artificial descriptors x = numpy.random.rand(N, DIM) x_p1 = x[x[:, 1] <= 0.30] x_p2 = x[(x[:, 1] >= 0.36) & (x[:, 1] <= 0.63)] x_p3 = x[x[:, 1] >= 0.69] d_p1 = p.map(make_element, enumerate(x_p1, di)) di += len(d_p1) d_p2 = p.map(make_element, enumerate(x_p2, di)) di += len(d_p2) d_p3 = p.map(make_element, enumerate(x_p3, di)) di += len(d_p3) # Create/Train test classifier classifier = LibSvmClassifier( train_params={ '-t': 0, # linear kernel '-b': 1, # enable probability estimates '-c': 2, # SVM-C parameter C '-q': '' # quite mode }, normalize=None, # DO NOT normalize descriptors ) classifier.train({P1_LABEL: d_p1, P2_LABEL: d_p2, P3_LABEL: d_p3}) # Test classifier x = numpy.random.rand(N, DIM) x_p1 = x[x[:, 1] <= 0.30] x_p2 = x[(x[:, 1] >= 0.36) & (x[:, 1] <= 0.63)] x_p3 = x[x[:, 1] >= 0.69] # Test that examples expected to classify to certain classes are. c_map_p1 = list(classifier._classify_arrays(x_p1)) for v, c_map in zip(x_p1, c_map_p1): assert c_map[P1_LABEL] > max(c_map[P2_LABEL], c_map[P3_LABEL]), \ "Incorrect {} label: {} :: {}".format(P1_LABEL, v, c_map) c_map_p2 = list(classifier._classify_arrays(x_p2)) for v, c_map in zip(x_p2, c_map_p2): assert c_map[P2_LABEL] > max(c_map[P1_LABEL], c_map[P3_LABEL]), \ "Incorrect {} label: {} :: {}".format(P2_LABEL, v, c_map) c_map_p3 = list(classifier._classify_arrays(x_p3)) for v, c_map in zip(x_p3, c_map_p3): assert c_map[P3_LABEL] > max(c_map[P1_LABEL], c_map[P2_LABEL]), \ "Incorrect {} label: {} :: {}".format(P3_LABEL, v, c_map) # Closing resources p.close() p.join()
def test_simple_classification(self): """ simple LibSvmClassifier test - 2-class Test libSVM classification functionality using random constructed data, training the y=0.5 split """ DIM = 2 N = 1000 POS_LABEL = 'positive' NEG_LABEL = 'negative' p = multiprocessing.pool.ThreadPool() d_factory = DescriptorElementFactory(DescriptorMemoryElement, {}) def make_element(iv): i, v = iv elem = d_factory.new_descriptor('test', i) elem.set_vector(v) return elem # Constructing artificial descriptors x = numpy.random.rand(N, DIM) x_pos = x[x[:, 1] <= 0.45] x_neg = x[x[:, 1] >= 0.55] d_pos = p.map(make_element, enumerate(x_pos)) d_neg = p.map(make_element, enumerate(x_neg, start=N//2)) # Create/Train test classifier classifier = LibSvmClassifier( train_params={ '-t': 0, # linear kernel '-b': 1, # enable probability estimates '-c': 2, # SVM-C parameter C '-q': '', # quite mode }, normalize=None, # DO NOT normalize descriptors ) classifier.train({POS_LABEL: d_pos, NEG_LABEL: d_neg}) # Test classifier x = numpy.random.rand(N, DIM) x_pos = x[x[:, 1] <= 0.45] x_neg = x[x[:, 1] >= 0.55] # Test that examples expected to classify to the positive class are, # and same for those expected to be in the negative class. c_map_pos = list(classifier._classify_arrays(x_pos)) for v, c_map in zip(x_pos, c_map_pos): assert c_map[POS_LABEL] > c_map[NEG_LABEL], \ "Found False positive: {} :: {}" \ .format(v, c_map) c_map_neg = list(classifier._classify_arrays(x_neg)) for v, c_map in zip(x_neg, c_map_neg): assert c_map[NEG_LABEL] > c_map[POS_LABEL], \ "Found False negative: {} :: {}" \ .format(v, c_map) # Closing resources p.close() p.join()